From 7db7222592ef1efa6c65617700ef22fa2289d71c Mon Sep 17 00:00:00 2001 From: "Lubomir I. Ivanov" Date: Tue, 6 Feb 2024 19:43:25 +0200 Subject: [PATCH] kubeadm: introduce the WaitForAllControlPlaneComponents feature gate WaitForAllControlPlaneComponents is a new feature gate that can be used to tell kubeadm to wait for all control plane components and not only kube-apiserver. - Add the Waiter function WaitForControlPlaneComponents that waits for all CP components in parallel. Uses the regular healthz endpoint for checks of status 200. - Add a new experimental phase to kubeadm join called "wait-control-plane". A similar phase exists for kubeadm init. --- cmd/kubeadm/app/cmd/join.go | 1 + .../app/cmd/phases/init/waitcontrolplane.go | 15 ++- cmd/kubeadm/app/cmd/phases/join/data.go | 1 + .../app/cmd/phases/join/waitcontrolplane.go | 87 +++++++++++++++++ cmd/kubeadm/app/features/features.go | 3 + .../app/phases/upgrade/staticpods_test.go | 5 + cmd/kubeadm/app/util/apiclient/wait.go | 96 +++++++++++++++++++ cmd/kubeadm/app/util/apiclient/wait_test.go | 78 +++++++++++++++ cmd/kubeadm/app/util/dryrun/dryrun.go | 6 ++ 9 files changed, 289 insertions(+), 3 deletions(-) create mode 100644 cmd/kubeadm/app/cmd/phases/join/waitcontrolplane.go create mode 100644 cmd/kubeadm/app/util/apiclient/wait_test.go diff --git a/cmd/kubeadm/app/cmd/join.go b/cmd/kubeadm/app/cmd/join.go index 9ddef2d4aa4..f0e651140b0 100644 --- a/cmd/kubeadm/app/cmd/join.go +++ b/cmd/kubeadm/app/cmd/join.go @@ -220,6 +220,7 @@ func newCmdJoin(out io.Writer, joinOptions *joinOptions) *cobra.Command { joinRunner.AppendPhase(phases.NewCheckEtcdPhase()) joinRunner.AppendPhase(phases.NewKubeletStartPhase()) joinRunner.AppendPhase(phases.NewControlPlaneJoinPhase()) + joinRunner.AppendPhase(phases.NewWaitControlPlanePhase()) // sets the data builder function, that will be used by the runner // both when running the entire workflow or single phases diff --git a/cmd/kubeadm/app/cmd/phases/init/waitcontrolplane.go b/cmd/kubeadm/app/cmd/phases/init/waitcontrolplane.go index 8b746f288c3..cfa5ff901d7 100644 --- a/cmd/kubeadm/app/cmd/phases/init/waitcontrolplane.go +++ b/cmd/kubeadm/app/cmd/phases/init/waitcontrolplane.go @@ -28,6 +28,7 @@ import ( clientset "k8s.io/client-go/kubernetes" "k8s.io/kubernetes/cmd/kubeadm/app/cmd/phases/workflow" + "k8s.io/kubernetes/cmd/kubeadm/app/features" "k8s.io/kubernetes/cmd/kubeadm/app/util/apiclient" dryrunutil "k8s.io/kubernetes/cmd/kubeadm/app/util/dryrun" ) @@ -57,9 +58,12 @@ var ( // NewWaitControlPlanePhase is a hidden phase that runs after the control-plane and etcd phases func NewWaitControlPlanePhase() workflow.Phase { phase := workflow.Phase{ - Name: "wait-control-plane", - Run: runWaitControlPlanePhase, + Name: "wait-control-plane", + Short: "Wait for the control plane to start", + // TODO: unhide this phase once WaitForAllControlPlaneComponents goes GA: + // https://github.com/kubernetes/kubeadm/issues/2907 Hidden: true, + Run: runWaitControlPlanePhase, } return phase } @@ -112,7 +116,12 @@ func runWaitControlPlanePhase(c workflow.RunData) error { } waiter.SetTimeout(data.Cfg().Timeouts.ControlPlaneComponentHealthCheck.Duration) - if err := waiter.WaitForAPI(); err != nil { + if features.Enabled(data.Cfg().ClusterConfiguration.FeatureGates, features.WaitForAllControlPlaneComponents) { + err = waiter.WaitForControlPlaneComponents(&data.Cfg().ClusterConfiguration) + } else { + err = waiter.WaitForAPI() + } + if err != nil { return handleError(err) } diff --git a/cmd/kubeadm/app/cmd/phases/join/data.go b/cmd/kubeadm/app/cmd/phases/join/data.go index 773caba2dd1..bbfb8730cbe 100644 --- a/cmd/kubeadm/app/cmd/phases/join/data.go +++ b/cmd/kubeadm/app/cmd/phases/join/data.go @@ -14,6 +14,7 @@ See the License for the specific language governing permissions and limitations under the License. */ +// Package phases includes command line phases for kubeadm join package phases import ( diff --git a/cmd/kubeadm/app/cmd/phases/join/waitcontrolplane.go b/cmd/kubeadm/app/cmd/phases/join/waitcontrolplane.go new file mode 100644 index 00000000000..42a4a109aa9 --- /dev/null +++ b/cmd/kubeadm/app/cmd/phases/join/waitcontrolplane.go @@ -0,0 +1,87 @@ +/* +Copyright 2024 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package phases + +import ( + "io" + "time" + + "github.com/pkg/errors" + + clientset "k8s.io/client-go/kubernetes" + "k8s.io/klog/v2" + + "k8s.io/kubernetes/cmd/kubeadm/app/cmd/phases/workflow" + "k8s.io/kubernetes/cmd/kubeadm/app/features" + "k8s.io/kubernetes/cmd/kubeadm/app/util/apiclient" + dryrunutil "k8s.io/kubernetes/cmd/kubeadm/app/util/dryrun" +) + +// NewWaitControlPlanePhase is a hidden phase that runs after the control-plane and etcd phases +func NewWaitControlPlanePhase() workflow.Phase { + phase := workflow.Phase{ + Name: "wait-control-plane", + // TODO: remove this EXPERIMENTAL prefix once WaitForAllControlPlaneComponents goes GA: + // https://github.com/kubernetes/kubeadm/issues/2907 + Short: "EXPERIMENTAL: Wait for the control plane to start", + Run: runWaitControlPlanePhase, + } + return phase +} + +func runWaitControlPlanePhase(c workflow.RunData) error { + data, ok := c.(JoinData) + if !ok { + return errors.New("wait-control-plane phase invoked with an invalid data struct") + } + + if data.Cfg().ControlPlane == nil { + return nil + } + + initCfg, err := data.InitCfg() + if err != nil { + return errors.Wrap(err, "could not obtain InitConfiguration during the wait-control-plane phase") + } + + // TODO: remove this check once WaitForAllControlPlaneComponents goes GA + // https://github.com/kubernetes/kubeadm/issues/2907 + if !features.Enabled(initCfg.ClusterConfiguration.FeatureGates, features.WaitForAllControlPlaneComponents) { + klog.V(5).Infof("[wait-control-plane] Skipping phase as the feature gate WaitForAllControlPlaneComponents is disabled") + return nil + } + + waiter, err := newControlPlaneWaiter(data.DryRun(), 0, nil, data.OutputWriter()) + if err != nil { + return errors.Wrap(err, "error creating waiter") + } + + waiter.SetTimeout(data.Cfg().Timeouts.ControlPlaneComponentHealthCheck.Duration) + if err := waiter.WaitForControlPlaneComponents(&initCfg.ClusterConfiguration); err != nil { + return err + } + + return nil +} + +// newControlPlaneWaiter returns a new waiter that is used to wait on the control plane to boot up. +func newControlPlaneWaiter(dryRun bool, timeout time.Duration, client clientset.Interface, out io.Writer) (apiclient.Waiter, error) { + if dryRun { + return dryrunutil.NewWaiter(), nil + } + return apiclient.NewKubeWaiter(client, timeout, out), nil +} diff --git a/cmd/kubeadm/app/features/features.go b/cmd/kubeadm/app/features/features.go index 25f4f263c7d..f3c28bb396f 100644 --- a/cmd/kubeadm/app/features/features.go +++ b/cmd/kubeadm/app/features/features.go @@ -38,6 +38,8 @@ const ( EtcdLearnerMode = "EtcdLearnerMode" // UpgradeAddonsBeforeControlPlane is expected to be in deprecated in v1.28 and will be removed in future release UpgradeAddonsBeforeControlPlane = "UpgradeAddonsBeforeControlPlane" + // WaitForAllControlPlaneComponents is expected to be alpha in v1.30 + WaitForAllControlPlaneComponents = "WaitForAllControlPlaneComponents" ) // InitFeatureGates are the default feature gates for the init command @@ -53,6 +55,7 @@ var InitFeatureGates = FeatureList{ FeatureSpec: featuregate.FeatureSpec{Default: false, PreRelease: featuregate.Deprecated}, DeprecationMessage: "The UpgradeAddonsBeforeControlPlane feature gate is deprecated and will be removed in a future release.", }, + WaitForAllControlPlaneComponents: {FeatureSpec: featuregate.FeatureSpec{Default: false, PreRelease: featuregate.Alpha}}, } // Feature represents a feature being gated diff --git a/cmd/kubeadm/app/phases/upgrade/staticpods_test.go b/cmd/kubeadm/app/phases/upgrade/staticpods_test.go index 94ba22b66ba..0c90b03071f 100644 --- a/cmd/kubeadm/app/phases/upgrade/staticpods_test.go +++ b/cmd/kubeadm/app/phases/upgrade/staticpods_test.go @@ -98,6 +98,11 @@ func NewFakeStaticPodWaiter(errsToReturn map[string]error) apiclient.Waiter { } } +// WaitForControlPlaneComponents just returns a dummy nil, to indicate that the program should just proceed +func (w *fakeWaiter) WaitForControlPlaneComponents(cfg *kubeadmapi.ClusterConfiguration) error { + return nil +} + // WaitForAPI just returns a dummy nil, to indicate that the program should just proceed func (w *fakeWaiter) WaitForAPI() error { return nil diff --git a/cmd/kubeadm/app/util/apiclient/wait.go b/cmd/kubeadm/app/util/apiclient/wait.go index da122b553dd..8515af16462 100644 --- a/cmd/kubeadm/app/util/apiclient/wait.go +++ b/cmd/kubeadm/app/util/apiclient/wait.go @@ -18,6 +18,7 @@ package apiclient import ( "context" + "crypto/tls" "fmt" "io" "net/http" @@ -28,16 +29,22 @@ import ( v1 "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + utilerrors "k8s.io/apimachinery/pkg/util/errors" netutil "k8s.io/apimachinery/pkg/util/net" "k8s.io/apimachinery/pkg/util/wait" clientset "k8s.io/client-go/kubernetes" + kubeadmapi "k8s.io/kubernetes/cmd/kubeadm/app/apis/kubeadm" "k8s.io/kubernetes/cmd/kubeadm/app/constants" ) // Waiter is an interface for waiting for criteria in Kubernetes to happen type Waiter interface { + // WaitForControlPlaneComponents waits for all control plane components to report "ok" on /healthz + WaitForControlPlaneComponents(cfg *kubeadmapi.ClusterConfiguration) error // WaitForAPI waits for the API Server's /healthz endpoint to become "ok" + // TODO: remove WaitForAPI once WaitForAllControlPlaneComponents goes GA: + // https://github.com/kubernetes/kubeadm/issues/2907 WaitForAPI() error // WaitForPodsWithLabel waits for Pods in the kube-system namespace to become Ready WaitForPodsWithLabel(kvLabel string) error @@ -72,6 +79,95 @@ func NewKubeWaiter(client clientset.Interface, timeout time.Duration, writer io. } } +type controlPlaneComponent struct { + name string + url string +} + +// getControlPlaneComponents takes a ClusterConfiguration and returns a slice of +// control plane components and their secure ports. +func getControlPlaneComponents(cfg *kubeadmapi.ClusterConfiguration) []controlPlaneComponent { + portArg := "secure-port" + portAPIServer, idx := kubeadmapi.GetArgValue(cfg.APIServer.ExtraArgs, portArg, -1) + if idx == -1 { + portAPIServer = "6443" + } + portKCM, idx := kubeadmapi.GetArgValue(cfg.ControllerManager.ExtraArgs, portArg, -1) + if idx == -1 { + portKCM = "10257" + } + portScheduler, idx := kubeadmapi.GetArgValue(cfg.Scheduler.ExtraArgs, portArg, -1) + if idx == -1 { + portScheduler = "10259" + } + urlFormat := "https://127.0.0.1:%s/healthz" + return []controlPlaneComponent{ + {name: "kube-apiserver", url: fmt.Sprintf(urlFormat, portAPIServer)}, + {name: "kube-controller-manager", url: fmt.Sprintf(urlFormat, portKCM)}, + {name: "kube-scheduler", url: fmt.Sprintf(urlFormat, portScheduler)}, + } +} + +// WaitForControlPlaneComponents waits for all control plane components to report "ok" on /healthz +func (w *KubeWaiter) WaitForControlPlaneComponents(cfg *kubeadmapi.ClusterConfiguration) error { + fmt.Printf("[control-plane-check] Waiting for healthy control plane components."+ + " This can take up to %v\n", w.timeout) + + components := getControlPlaneComponents(cfg) + + var errs []error + errChan := make(chan error, len(components)) + + for _, comp := range components { + fmt.Printf("[control-plane-check] Checking %s at %s\n", comp.name, comp.url) + + go func(comp controlPlaneComponent) { + tr := &http.Transport{ + TLSClientConfig: &tls.Config{InsecureSkipVerify: true}, + } + client := &http.Client{Transport: tr} + start := time.Now() + var lastError error + + err := wait.PollUntilContextTimeout( + context.Background(), + constants.KubernetesAPICallRetryInterval, + w.timeout, + true, func(ctx context.Context) (bool, error) { + resp, err := client.Get(comp.url) + if err != nil { + lastError = errors.WithMessagef(err, "%s /healthz check failed", comp.name) + return false, nil + } + + defer func() { + _ = resp.Body.Close() + }() + if resp.StatusCode != http.StatusOK { + lastError = errors.Errorf("%s /healthz check failed with status: %d", comp.name, resp.StatusCode) + return false, nil + } + + return true, nil + }) + if err != nil { + fmt.Printf("[control-plane-check] %s is not healthy after %v\n", comp.name, time.Since(start)) + errChan <- lastError + return + } + fmt.Printf("[control-plane-check] %s is healthy after %v\n", comp.name, time.Since(start)) + errChan <- nil + }(comp) + } + + for i := 0; i < len(components); i++ { + if err := <-errChan; err != nil { + errs = append(errs, err) + } + } + return utilerrors.NewAggregate(errs) +} + // WaitForAPI waits for the API Server's /healthz endpoint to report "ok" func (w *KubeWaiter) WaitForAPI() error { fmt.Printf("[api-check] Waiting for a healthy API server. This can take up to %v\n", w.timeout) diff --git a/cmd/kubeadm/app/util/apiclient/wait_test.go b/cmd/kubeadm/app/util/apiclient/wait_test.go new file mode 100644 index 00000000000..a5e50c41af0 --- /dev/null +++ b/cmd/kubeadm/app/util/apiclient/wait_test.go @@ -0,0 +1,78 @@ +/* +Copyright 2024 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package apiclient + +import ( + "reflect" + "testing" + + kubeadmapi "k8s.io/kubernetes/cmd/kubeadm/app/apis/kubeadm" +) + +func TestGetControlPlaneComponents(t *testing.T) { + testcases := []struct { + name string + cfg *kubeadmapi.ClusterConfiguration + expected []controlPlaneComponent + }{ + { + name: "port values from config", + cfg: &kubeadmapi.ClusterConfiguration{ + APIServer: kubeadmapi.APIServer{ + ControlPlaneComponent: kubeadmapi.ControlPlaneComponent{ + ExtraArgs: []kubeadmapi.Arg{ + {Name: "secure-port", Value: "1111"}, + }, + }, + }, + ControllerManager: kubeadmapi.ControlPlaneComponent{ + ExtraArgs: []kubeadmapi.Arg{ + {Name: "secure-port", Value: "2222"}, + }, + }, + Scheduler: kubeadmapi.ControlPlaneComponent{ + ExtraArgs: []kubeadmapi.Arg{ + {Name: "secure-port", Value: "3333"}, + }, + }, + }, + expected: []controlPlaneComponent{ + {name: "kube-apiserver", url: "https://127.0.0.1:1111/healthz"}, + {name: "kube-controller-manager", url: "https://127.0.0.1:2222/healthz"}, + {name: "kube-scheduler", url: "https://127.0.0.1:3333/healthz"}, + }, + }, + { + name: "default ports", + cfg: &kubeadmapi.ClusterConfiguration{}, + expected: []controlPlaneComponent{ + {name: "kube-apiserver", url: "https://127.0.0.1:6443/healthz"}, + {name: "kube-controller-manager", url: "https://127.0.0.1:10257/healthz"}, + {name: "kube-scheduler", url: "https://127.0.0.1:10259/healthz"}, + }, + }, + } + + for _, tc := range testcases { + t.Run(tc.name, func(t *testing.T) { + actual := getControlPlaneComponents(tc.cfg) + if !reflect.DeepEqual(tc.expected, actual) { + t.Fatalf("expected result: %+v, got: %+v", tc.expected, actual) + } + }) + } +} diff --git a/cmd/kubeadm/app/util/dryrun/dryrun.go b/cmd/kubeadm/app/util/dryrun/dryrun.go index b1c4e7a4712..ce8d54c8a3d 100644 --- a/cmd/kubeadm/app/util/dryrun/dryrun.go +++ b/cmd/kubeadm/app/util/dryrun/dryrun.go @@ -26,6 +26,7 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" errorsutil "k8s.io/apimachinery/pkg/util/errors" + kubeadmapi "k8s.io/kubernetes/cmd/kubeadm/app/apis/kubeadm" kubeadmconstants "k8s.io/kubernetes/cmd/kubeadm/app/constants" "k8s.io/kubernetes/cmd/kubeadm/app/util/apiclient" ) @@ -88,6 +89,11 @@ func NewWaiter() apiclient.Waiter { return &Waiter{} } +// WaitForControlPlaneComponents just returns a dummy nil, to indicate that the program should just proceed +func (w *Waiter) WaitForControlPlaneComponents(cfg *kubeadmapi.ClusterConfiguration) error { + return nil +} + // WaitForAPI just returns a dummy nil, to indicate that the program should just proceed func (w *Waiter) WaitForAPI() error { fmt.Println("[dryrun] Would wait for the API Server's /healthz endpoint to return 'ok'")