Merge pull request #123341 from neolit123/1.30-health-check-all-cp-components

kubeadm: introduce the WaitForAllControlPlaneComponents feature gate
This commit is contained in:
Kubernetes Prow Robot 2024-02-29 05:05:42 -08:00 committed by GitHub
commit 03f24068da
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
9 changed files with 289 additions and 3 deletions

View File

@ -220,6 +220,7 @@ func newCmdJoin(out io.Writer, joinOptions *joinOptions) *cobra.Command {
joinRunner.AppendPhase(phases.NewCheckEtcdPhase())
joinRunner.AppendPhase(phases.NewKubeletStartPhase())
joinRunner.AppendPhase(phases.NewControlPlaneJoinPhase())
joinRunner.AppendPhase(phases.NewWaitControlPlanePhase())
// sets the data builder function, that will be used by the runner
// both when running the entire workflow or single phases

View File

@ -28,6 +28,7 @@ import (
clientset "k8s.io/client-go/kubernetes"
"k8s.io/kubernetes/cmd/kubeadm/app/cmd/phases/workflow"
"k8s.io/kubernetes/cmd/kubeadm/app/features"
"k8s.io/kubernetes/cmd/kubeadm/app/util/apiclient"
dryrunutil "k8s.io/kubernetes/cmd/kubeadm/app/util/dryrun"
)
@ -57,9 +58,12 @@ var (
// NewWaitControlPlanePhase is a hidden phase that runs after the control-plane and etcd phases
func NewWaitControlPlanePhase() workflow.Phase {
phase := workflow.Phase{
Name: "wait-control-plane",
Run: runWaitControlPlanePhase,
Name: "wait-control-plane",
Short: "Wait for the control plane to start",
// TODO: unhide this phase once WaitForAllControlPlaneComponents goes GA:
// https://github.com/kubernetes/kubeadm/issues/2907
Hidden: true,
Run: runWaitControlPlanePhase,
}
return phase
}
@ -112,7 +116,12 @@ func runWaitControlPlanePhase(c workflow.RunData) error {
}
waiter.SetTimeout(data.Cfg().Timeouts.ControlPlaneComponentHealthCheck.Duration)
if err := waiter.WaitForAPI(); err != nil {
if features.Enabled(data.Cfg().ClusterConfiguration.FeatureGates, features.WaitForAllControlPlaneComponents) {
err = waiter.WaitForControlPlaneComponents(&data.Cfg().ClusterConfiguration)
} else {
err = waiter.WaitForAPI()
}
if err != nil {
return handleError(err)
}

View File

@ -14,6 +14,7 @@ See the License for the specific language governing permissions and
limitations under the License.
*/
// Package phases includes command line phases for kubeadm join
package phases
import (

View File

@ -0,0 +1,87 @@
/*
Copyright 2024 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package phases
import (
"io"
"time"
"github.com/pkg/errors"
clientset "k8s.io/client-go/kubernetes"
"k8s.io/klog/v2"
"k8s.io/kubernetes/cmd/kubeadm/app/cmd/phases/workflow"
"k8s.io/kubernetes/cmd/kubeadm/app/features"
"k8s.io/kubernetes/cmd/kubeadm/app/util/apiclient"
dryrunutil "k8s.io/kubernetes/cmd/kubeadm/app/util/dryrun"
)
// NewWaitControlPlanePhase is a hidden phase that runs after the control-plane and etcd phases
func NewWaitControlPlanePhase() workflow.Phase {
phase := workflow.Phase{
Name: "wait-control-plane",
// TODO: remove this EXPERIMENTAL prefix once WaitForAllControlPlaneComponents goes GA:
// https://github.com/kubernetes/kubeadm/issues/2907
Short: "EXPERIMENTAL: Wait for the control plane to start",
Run: runWaitControlPlanePhase,
}
return phase
}
func runWaitControlPlanePhase(c workflow.RunData) error {
data, ok := c.(JoinData)
if !ok {
return errors.New("wait-control-plane phase invoked with an invalid data struct")
}
if data.Cfg().ControlPlane == nil {
return nil
}
initCfg, err := data.InitCfg()
if err != nil {
return errors.Wrap(err, "could not obtain InitConfiguration during the wait-control-plane phase")
}
// TODO: remove this check once WaitForAllControlPlaneComponents goes GA
// https://github.com/kubernetes/kubeadm/issues/2907
if !features.Enabled(initCfg.ClusterConfiguration.FeatureGates, features.WaitForAllControlPlaneComponents) {
klog.V(5).Infof("[wait-control-plane] Skipping phase as the feature gate WaitForAllControlPlaneComponents is disabled")
return nil
}
waiter, err := newControlPlaneWaiter(data.DryRun(), 0, nil, data.OutputWriter())
if err != nil {
return errors.Wrap(err, "error creating waiter")
}
waiter.SetTimeout(data.Cfg().Timeouts.ControlPlaneComponentHealthCheck.Duration)
if err := waiter.WaitForControlPlaneComponents(&initCfg.ClusterConfiguration); err != nil {
return err
}
return nil
}
// newControlPlaneWaiter returns a new waiter that is used to wait on the control plane to boot up.
func newControlPlaneWaiter(dryRun bool, timeout time.Duration, client clientset.Interface, out io.Writer) (apiclient.Waiter, error) {
if dryRun {
return dryrunutil.NewWaiter(), nil
}
return apiclient.NewKubeWaiter(client, timeout, out), nil
}

View File

@ -38,6 +38,8 @@ const (
EtcdLearnerMode = "EtcdLearnerMode"
// UpgradeAddonsBeforeControlPlane is expected to be in deprecated in v1.28 and will be removed in future release
UpgradeAddonsBeforeControlPlane = "UpgradeAddonsBeforeControlPlane"
// WaitForAllControlPlaneComponents is expected to be alpha in v1.30
WaitForAllControlPlaneComponents = "WaitForAllControlPlaneComponents"
)
// InitFeatureGates are the default feature gates for the init command
@ -53,6 +55,7 @@ var InitFeatureGates = FeatureList{
FeatureSpec: featuregate.FeatureSpec{Default: false, PreRelease: featuregate.Deprecated},
DeprecationMessage: "The UpgradeAddonsBeforeControlPlane feature gate is deprecated and will be removed in a future release.",
},
WaitForAllControlPlaneComponents: {FeatureSpec: featuregate.FeatureSpec{Default: false, PreRelease: featuregate.Alpha}},
}
// Feature represents a feature being gated

View File

@ -98,6 +98,11 @@ func NewFakeStaticPodWaiter(errsToReturn map[string]error) apiclient.Waiter {
}
}
// WaitForControlPlaneComponents just returns a dummy nil, to indicate that the program should just proceed
func (w *fakeWaiter) WaitForControlPlaneComponents(cfg *kubeadmapi.ClusterConfiguration) error {
return nil
}
// WaitForAPI just returns a dummy nil, to indicate that the program should just proceed
func (w *fakeWaiter) WaitForAPI() error {
return nil

View File

@ -18,6 +18,7 @@ package apiclient
import (
"context"
"crypto/tls"
"fmt"
"io"
"net/http"
@ -28,16 +29,22 @@ import (
v1 "k8s.io/api/core/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
utilerrors "k8s.io/apimachinery/pkg/util/errors"
netutil "k8s.io/apimachinery/pkg/util/net"
"k8s.io/apimachinery/pkg/util/wait"
clientset "k8s.io/client-go/kubernetes"
kubeadmapi "k8s.io/kubernetes/cmd/kubeadm/app/apis/kubeadm"
"k8s.io/kubernetes/cmd/kubeadm/app/constants"
)
// Waiter is an interface for waiting for criteria in Kubernetes to happen
type Waiter interface {
// WaitForControlPlaneComponents waits for all control plane components to report "ok" on /healthz
WaitForControlPlaneComponents(cfg *kubeadmapi.ClusterConfiguration) error
// WaitForAPI waits for the API Server's /healthz endpoint to become "ok"
// TODO: remove WaitForAPI once WaitForAllControlPlaneComponents goes GA:
// https://github.com/kubernetes/kubeadm/issues/2907
WaitForAPI() error
// WaitForPodsWithLabel waits for Pods in the kube-system namespace to become Ready
WaitForPodsWithLabel(kvLabel string) error
@ -72,6 +79,95 @@ func NewKubeWaiter(client clientset.Interface, timeout time.Duration, writer io.
}
}
type controlPlaneComponent struct {
name string
url string
}
// getControlPlaneComponents takes a ClusterConfiguration and returns a slice of
// control plane components and their secure ports.
func getControlPlaneComponents(cfg *kubeadmapi.ClusterConfiguration) []controlPlaneComponent {
portArg := "secure-port"
portAPIServer, idx := kubeadmapi.GetArgValue(cfg.APIServer.ExtraArgs, portArg, -1)
if idx == -1 {
portAPIServer = "6443"
}
portKCM, idx := kubeadmapi.GetArgValue(cfg.ControllerManager.ExtraArgs, portArg, -1)
if idx == -1 {
portKCM = "10257"
}
portScheduler, idx := kubeadmapi.GetArgValue(cfg.Scheduler.ExtraArgs, portArg, -1)
if idx == -1 {
portScheduler = "10259"
}
urlFormat := "https://127.0.0.1:%s/healthz"
return []controlPlaneComponent{
{name: "kube-apiserver", url: fmt.Sprintf(urlFormat, portAPIServer)},
{name: "kube-controller-manager", url: fmt.Sprintf(urlFormat, portKCM)},
{name: "kube-scheduler", url: fmt.Sprintf(urlFormat, portScheduler)},
}
}
// WaitForControlPlaneComponents waits for all control plane components to report "ok" on /healthz
func (w *KubeWaiter) WaitForControlPlaneComponents(cfg *kubeadmapi.ClusterConfiguration) error {
fmt.Printf("[control-plane-check] Waiting for healthy control plane components."+
" This can take up to %v\n", w.timeout)
components := getControlPlaneComponents(cfg)
var errs []error
errChan := make(chan error, len(components))
for _, comp := range components {
fmt.Printf("[control-plane-check] Checking %s at %s\n", comp.name, comp.url)
go func(comp controlPlaneComponent) {
tr := &http.Transport{
TLSClientConfig: &tls.Config{InsecureSkipVerify: true},
}
client := &http.Client{Transport: tr}
start := time.Now()
var lastError error
err := wait.PollUntilContextTimeout(
context.Background(),
constants.KubernetesAPICallRetryInterval,
w.timeout,
true, func(ctx context.Context) (bool, error) {
resp, err := client.Get(comp.url)
if err != nil {
lastError = errors.WithMessagef(err, "%s /healthz check failed", comp.name)
return false, nil
}
defer func() {
_ = resp.Body.Close()
}()
if resp.StatusCode != http.StatusOK {
lastError = errors.Errorf("%s /healthz check failed with status: %d", comp.name, resp.StatusCode)
return false, nil
}
return true, nil
})
if err != nil {
fmt.Printf("[control-plane-check] %s is not healthy after %v\n", comp.name, time.Since(start))
errChan <- lastError
return
}
fmt.Printf("[control-plane-check] %s is healthy after %v\n", comp.name, time.Since(start))
errChan <- nil
}(comp)
}
for i := 0; i < len(components); i++ {
if err := <-errChan; err != nil {
errs = append(errs, err)
}
}
return utilerrors.NewAggregate(errs)
}
// WaitForAPI waits for the API Server's /healthz endpoint to report "ok"
func (w *KubeWaiter) WaitForAPI() error {
fmt.Printf("[api-check] Waiting for a healthy API server. This can take up to %v\n", w.timeout)

View File

@ -0,0 +1,78 @@
/*
Copyright 2024 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package apiclient
import (
"reflect"
"testing"
kubeadmapi "k8s.io/kubernetes/cmd/kubeadm/app/apis/kubeadm"
)
func TestGetControlPlaneComponents(t *testing.T) {
testcases := []struct {
name string
cfg *kubeadmapi.ClusterConfiguration
expected []controlPlaneComponent
}{
{
name: "port values from config",
cfg: &kubeadmapi.ClusterConfiguration{
APIServer: kubeadmapi.APIServer{
ControlPlaneComponent: kubeadmapi.ControlPlaneComponent{
ExtraArgs: []kubeadmapi.Arg{
{Name: "secure-port", Value: "1111"},
},
},
},
ControllerManager: kubeadmapi.ControlPlaneComponent{
ExtraArgs: []kubeadmapi.Arg{
{Name: "secure-port", Value: "2222"},
},
},
Scheduler: kubeadmapi.ControlPlaneComponent{
ExtraArgs: []kubeadmapi.Arg{
{Name: "secure-port", Value: "3333"},
},
},
},
expected: []controlPlaneComponent{
{name: "kube-apiserver", url: "https://127.0.0.1:1111/healthz"},
{name: "kube-controller-manager", url: "https://127.0.0.1:2222/healthz"},
{name: "kube-scheduler", url: "https://127.0.0.1:3333/healthz"},
},
},
{
name: "default ports",
cfg: &kubeadmapi.ClusterConfiguration{},
expected: []controlPlaneComponent{
{name: "kube-apiserver", url: "https://127.0.0.1:6443/healthz"},
{name: "kube-controller-manager", url: "https://127.0.0.1:10257/healthz"},
{name: "kube-scheduler", url: "https://127.0.0.1:10259/healthz"},
},
},
}
for _, tc := range testcases {
t.Run(tc.name, func(t *testing.T) {
actual := getControlPlaneComponents(tc.cfg)
if !reflect.DeepEqual(tc.expected, actual) {
t.Fatalf("expected result: %+v, got: %+v", tc.expected, actual)
}
})
}
}

View File

@ -26,6 +26,7 @@ import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
errorsutil "k8s.io/apimachinery/pkg/util/errors"
kubeadmapi "k8s.io/kubernetes/cmd/kubeadm/app/apis/kubeadm"
kubeadmconstants "k8s.io/kubernetes/cmd/kubeadm/app/constants"
"k8s.io/kubernetes/cmd/kubeadm/app/util/apiclient"
)
@ -88,6 +89,11 @@ func NewWaiter() apiclient.Waiter {
return &Waiter{}
}
// WaitForControlPlaneComponents just returns a dummy nil, to indicate that the program should just proceed
func (w *Waiter) WaitForControlPlaneComponents(cfg *kubeadmapi.ClusterConfiguration) error {
return nil
}
// WaitForAPI just returns a dummy nil, to indicate that the program should just proceed
func (w *Waiter) WaitForAPI() error {
fmt.Println("[dryrun] Would wait for the API Server's /healthz endpoint to return 'ok'")