mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-25 12:43:23 +00:00
Merge pull request #56130 from anguslees/kubeadm-nodehealth
Automatic merge from submit-queue. If you want to cherry-pick this change to another branch, please follow the instructions <a href="https://github.com/kubernetes/community/blob/master/contributors/devel/cherry-picks.md">here</a>. Make healthchecks skippable, and check masters only **What this PR does / why we need it**: Previously kubeadm would abort if _any_ node was not Ready. This is obviously infeasible in a non-trivial (esp. baremetal) cluster. This PR makes two changes: - Allows kubeadm healthchecks to be selectively skipped (made non-fatal) with --ignore-checks-errors. - Check only that the *master* nodes are Ready. **Which issue(s) this PR fixes** *(optional, in `fixes #<issue number>(, fixes #<issue_number>, ...)` format, will close the issue(s) when PR gets merged)*: Fixes kubernetes/kubeadm#539 **Special notes for your reviewer**: Builds on #56072 **Release note**: ```release-note kubeadm health checks can also be skipped with `--ignore-checks-errors` ```
This commit is contained in:
commit
58fca39de3
@ -119,7 +119,7 @@ func NewCmdApply(parentFlags *cmdUpgradeFlags) *cobra.Command {
|
|||||||
func RunApply(flags *applyFlags) error {
|
func RunApply(flags *applyFlags) error {
|
||||||
|
|
||||||
// Start with the basics, verify that the cluster is healthy and get the configuration from the cluster (using the ConfigMap)
|
// Start with the basics, verify that the cluster is healthy and get the configuration from the cluster (using the ConfigMap)
|
||||||
upgradeVars, err := enforceRequirements(flags.parent.featureGatesString, flags.parent.kubeConfigPath, flags.parent.cfgPath, flags.parent.printConfig, flags.dryRun)
|
upgradeVars, err := enforceRequirements(flags.parent.featureGatesString, flags.parent.kubeConfigPath, flags.parent.cfgPath, flags.parent.printConfig, flags.dryRun, flags.parent.ignorePreflightErrorsSet)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
@ -48,14 +48,14 @@ type upgradeVariables struct {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// enforceRequirements verifies that it's okay to upgrade and then returns the variables needed for the rest of the procedure
|
// enforceRequirements verifies that it's okay to upgrade and then returns the variables needed for the rest of the procedure
|
||||||
func enforceRequirements(featureGatesString, kubeConfigPath, cfgPath string, printConfig, dryRun bool) (*upgradeVariables, error) {
|
func enforceRequirements(featureGatesString, kubeConfigPath, cfgPath string, printConfig, dryRun bool, ignoreChecksErrors sets.String) (*upgradeVariables, error) {
|
||||||
client, err := getClient(kubeConfigPath, dryRun)
|
client, err := getClient(kubeConfigPath, dryRun)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("couldn't create a Kubernetes client from file %q: %v", kubeConfigPath, err)
|
return nil, fmt.Errorf("couldn't create a Kubernetes client from file %q: %v", kubeConfigPath, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Run healthchecks against the cluster
|
// Run healthchecks against the cluster
|
||||||
if err := upgrade.CheckClusterHealth(client); err != nil {
|
if err := upgrade.CheckClusterHealth(client, ignoreChecksErrors); err != nil {
|
||||||
return nil, fmt.Errorf("[upgrade/health] FATAL: %v", err)
|
return nil, fmt.Errorf("[upgrade/health] FATAL: %v", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -55,7 +55,7 @@ func NewCmdPlan(parentFlags *cmdUpgradeFlags) *cobra.Command {
|
|||||||
// RunPlan takes care of outputting available versions to upgrade to for the user
|
// RunPlan takes care of outputting available versions to upgrade to for the user
|
||||||
func RunPlan(parentFlags *cmdUpgradeFlags) error {
|
func RunPlan(parentFlags *cmdUpgradeFlags) error {
|
||||||
// Start with the basics, verify that the cluster is healthy, build a client and a versionGetter. Never set dry-run for plan.
|
// Start with the basics, verify that the cluster is healthy, build a client and a versionGetter. Never set dry-run for plan.
|
||||||
upgradeVars, err := enforceRequirements(parentFlags.featureGatesString, parentFlags.kubeConfigPath, parentFlags.cfgPath, parentFlags.printConfig, false)
|
upgradeVars, err := enforceRequirements(parentFlags.featureGatesString, parentFlags.kubeConfigPath, parentFlags.cfgPath, parentFlags.printConfig, false, parentFlags.ignorePreflightErrorsSet)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
@ -32,6 +32,7 @@ go_library(
|
|||||||
"//cmd/kubeadm/app/phases/etcd:go_default_library",
|
"//cmd/kubeadm/app/phases/etcd:go_default_library",
|
||||||
"//cmd/kubeadm/app/phases/selfhosting:go_default_library",
|
"//cmd/kubeadm/app/phases/selfhosting:go_default_library",
|
||||||
"//cmd/kubeadm/app/phases/uploadconfig:go_default_library",
|
"//cmd/kubeadm/app/phases/uploadconfig:go_default_library",
|
||||||
|
"//cmd/kubeadm/app/preflight:go_default_library",
|
||||||
"//cmd/kubeadm/app/util:go_default_library",
|
"//cmd/kubeadm/app/util:go_default_library",
|
||||||
"//cmd/kubeadm/app/util/apiclient:go_default_library",
|
"//cmd/kubeadm/app/util/apiclient:go_default_library",
|
||||||
"//cmd/kubeadm/app/util/config:go_default_library",
|
"//cmd/kubeadm/app/util/config:go_default_library",
|
||||||
@ -42,8 +43,10 @@ go_library(
|
|||||||
"//vendor/k8s.io/api/core/v1:go_default_library",
|
"//vendor/k8s.io/api/core/v1:go_default_library",
|
||||||
"//vendor/k8s.io/apimachinery/pkg/api/errors:go_default_library",
|
"//vendor/k8s.io/apimachinery/pkg/api/errors:go_default_library",
|
||||||
"//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
|
"//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
|
||||||
|
"//vendor/k8s.io/apimachinery/pkg/labels:go_default_library",
|
||||||
"//vendor/k8s.io/apimachinery/pkg/runtime:go_default_library",
|
"//vendor/k8s.io/apimachinery/pkg/runtime:go_default_library",
|
||||||
"//vendor/k8s.io/apimachinery/pkg/util/errors:go_default_library",
|
"//vendor/k8s.io/apimachinery/pkg/util/errors:go_default_library",
|
||||||
|
"//vendor/k8s.io/apimachinery/pkg/util/sets:go_default_library",
|
||||||
"//vendor/k8s.io/client-go/kubernetes:go_default_library",
|
"//vendor/k8s.io/client-go/kubernetes:go_default_library",
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
@ -24,73 +24,72 @@ import (
|
|||||||
apps "k8s.io/api/apps/v1beta2"
|
apps "k8s.io/api/apps/v1beta2"
|
||||||
"k8s.io/api/core/v1"
|
"k8s.io/api/core/v1"
|
||||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||||
|
"k8s.io/apimachinery/pkg/labels"
|
||||||
|
"k8s.io/apimachinery/pkg/util/sets"
|
||||||
clientset "k8s.io/client-go/kubernetes"
|
clientset "k8s.io/client-go/kubernetes"
|
||||||
"k8s.io/kubernetes/cmd/kubeadm/app/constants"
|
"k8s.io/kubernetes/cmd/kubeadm/app/constants"
|
||||||
|
"k8s.io/kubernetes/cmd/kubeadm/app/preflight"
|
||||||
)
|
)
|
||||||
|
|
||||||
// healthCheck is a helper struct for easily performing healthchecks against the cluster and printing the output
|
// healthCheck is a helper struct for easily performing healthchecks against the cluster and printing the output
|
||||||
type healthCheck struct {
|
type healthCheck struct {
|
||||||
description, okMessage, failMessage string
|
name string
|
||||||
// f is invoked with a k8s client passed to it. Should return an optional warning and/or an error
|
client clientset.Interface
|
||||||
|
// f is invoked with a k8s client passed to it. Should return an optional error
|
||||||
f func(clientset.Interface) error
|
f func(clientset.Interface) error
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Check is part of the preflight.Checker interface
|
||||||
|
func (c *healthCheck) Check() (warnings, errors []error) {
|
||||||
|
if err := c.f(c.client); err != nil {
|
||||||
|
return nil, []error{err}
|
||||||
|
}
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Name is part of the preflight.Checker interface
|
||||||
|
func (c *healthCheck) Name() string {
|
||||||
|
return c.name
|
||||||
|
}
|
||||||
|
|
||||||
// CheckClusterHealth makes sure:
|
// CheckClusterHealth makes sure:
|
||||||
// - the API /healthz endpoint is healthy
|
// - the API /healthz endpoint is healthy
|
||||||
// - all Nodes are Ready
|
// - all master Nodes are Ready
|
||||||
// - (if self-hosted) that there are DaemonSets with at least one Pod for all control plane components
|
// - (if self-hosted) that there are DaemonSets with at least one Pod for all control plane components
|
||||||
// - (if static pod-hosted) that all required Static Pod manifests exist on disk
|
// - (if static pod-hosted) that all required Static Pod manifests exist on disk
|
||||||
func CheckClusterHealth(client clientset.Interface) error {
|
func CheckClusterHealth(client clientset.Interface, ignoreChecksErrors sets.String) error {
|
||||||
fmt.Println("[upgrade] Making sure the cluster is healthy:")
|
fmt.Println("[upgrade] Making sure the cluster is healthy:")
|
||||||
|
|
||||||
healthChecks := []healthCheck{
|
healthChecks := []preflight.Checker{
|
||||||
{
|
&healthCheck{
|
||||||
description: "API Server health",
|
name: "APIServerHealth",
|
||||||
okMessage: "Healthy",
|
client: client,
|
||||||
failMessage: "Unhealthy",
|
f: apiServerHealthy,
|
||||||
f: apiServerHealthy,
|
|
||||||
},
|
},
|
||||||
{
|
&healthCheck{
|
||||||
description: "Node health",
|
name: "MasterNodesReady",
|
||||||
okMessage: "All Nodes are healthy",
|
client: client,
|
||||||
failMessage: "More than one Node unhealthy",
|
f: masterNodesReady,
|
||||||
f: nodesHealthy,
|
|
||||||
},
|
},
|
||||||
// TODO: Add a check for ComponentStatuses here?
|
// TODO: Add a check for ComponentStatuses here?
|
||||||
}
|
}
|
||||||
|
|
||||||
// Run slightly different health checks depending on control plane hosting type
|
// Run slightly different health checks depending on control plane hosting type
|
||||||
if IsControlPlaneSelfHosted(client) {
|
if IsControlPlaneSelfHosted(client) {
|
||||||
healthChecks = append(healthChecks, healthCheck{
|
healthChecks = append(healthChecks, &healthCheck{
|
||||||
description: "Control plane DaemonSet health",
|
name: "ControlPlaneHealth",
|
||||||
okMessage: "All control plane DaemonSets are healthy",
|
client: client,
|
||||||
failMessage: "More than one control plane DaemonSet unhealthy",
|
f: controlPlaneHealth,
|
||||||
f: controlPlaneHealth,
|
|
||||||
})
|
})
|
||||||
} else {
|
} else {
|
||||||
healthChecks = append(healthChecks, healthCheck{
|
healthChecks = append(healthChecks, &healthCheck{
|
||||||
description: "Static Pod manifests exists on disk",
|
name: "StaticPodManifest",
|
||||||
okMessage: "All manifests exist on disk",
|
client: client,
|
||||||
failMessage: "Some manifests don't exist on disk",
|
f: staticPodManifestHealth,
|
||||||
f: staticPodManifestHealth,
|
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
return runHealthChecks(client, healthChecks)
|
return preflight.RunChecks(healthChecks, os.Stderr, ignoreChecksErrors)
|
||||||
}
|
|
||||||
|
|
||||||
// runHealthChecks runs a set of health checks against the cluster
|
|
||||||
func runHealthChecks(client clientset.Interface, healthChecks []healthCheck) error {
|
|
||||||
for _, check := range healthChecks {
|
|
||||||
|
|
||||||
err := check.f(client)
|
|
||||||
if err != nil {
|
|
||||||
fmt.Printf("[upgrade/health] Checking %s: %s\n", check.description, check.failMessage)
|
|
||||||
return fmt.Errorf("The cluster is not in an upgradeable state due to: %v", err)
|
|
||||||
}
|
|
||||||
fmt.Printf("[upgrade/health] Checking %s: %s\n", check.description, check.okMessage)
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// apiServerHealthy checks whether the API server's /healthz endpoint is healthy
|
// apiServerHealthy checks whether the API server's /healthz endpoint is healthy
|
||||||
@ -108,16 +107,25 @@ func apiServerHealthy(client clientset.Interface) error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// nodesHealthy checks whether all Nodes in the cluster are in the Running state
|
// masterNodesReady checks whether all master Nodes in the cluster are in the Running state
|
||||||
func nodesHealthy(client clientset.Interface) error {
|
func masterNodesReady(client clientset.Interface) error {
|
||||||
nodes, err := client.CoreV1().Nodes().List(metav1.ListOptions{})
|
selector := labels.SelectorFromSet(labels.Set(map[string]string{
|
||||||
|
constants.LabelNodeRoleMaster: "",
|
||||||
|
}))
|
||||||
|
masters, err := client.CoreV1().Nodes().List(metav1.ListOptions{
|
||||||
|
LabelSelector: selector.String(),
|
||||||
|
})
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("couldn't list all nodes in cluster: %v", err)
|
return fmt.Errorf("couldn't list masters in cluster: %v", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
notReadyNodes := getNotReadyNodes(nodes.Items)
|
if len(masters.Items) == 0 {
|
||||||
if len(notReadyNodes) != 0 {
|
return fmt.Errorf("failed to find any nodes with master role")
|
||||||
return fmt.Errorf("there are NotReady Nodes in the cluster: %v", notReadyNodes)
|
}
|
||||||
|
|
||||||
|
notReadyMasters := getNotReadyNodes(masters.Items)
|
||||||
|
if len(notReadyMasters) != 0 {
|
||||||
|
return fmt.Errorf("there are NotReady masters in the cluster: %v", notReadyMasters)
|
||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user