diff --git a/cmd/kubeadm/app/cmd/upgrade/apply.go b/cmd/kubeadm/app/cmd/upgrade/apply.go index e6a24c5440d..fdb4f4f240b 100644 --- a/cmd/kubeadm/app/cmd/upgrade/apply.go +++ b/cmd/kubeadm/app/cmd/upgrade/apply.go @@ -119,7 +119,7 @@ func NewCmdApply(parentFlags *cmdUpgradeFlags) *cobra.Command { func RunApply(flags *applyFlags) error { // Start with the basics, verify that the cluster is healthy and get the configuration from the cluster (using the ConfigMap) - upgradeVars, err := enforceRequirements(flags.parent.featureGatesString, flags.parent.kubeConfigPath, flags.parent.cfgPath, flags.parent.printConfig, flags.dryRun) + upgradeVars, err := enforceRequirements(flags.parent.featureGatesString, flags.parent.kubeConfigPath, flags.parent.cfgPath, flags.parent.printConfig, flags.dryRun, flags.parent.ignorePreflightErrorsSet) if err != nil { return err } diff --git a/cmd/kubeadm/app/cmd/upgrade/common.go b/cmd/kubeadm/app/cmd/upgrade/common.go index 1cadd65940c..2fbbf08f510 100644 --- a/cmd/kubeadm/app/cmd/upgrade/common.go +++ b/cmd/kubeadm/app/cmd/upgrade/common.go @@ -48,14 +48,14 @@ type upgradeVariables struct { } // enforceRequirements verifies that it's okay to upgrade and then returns the variables needed for the rest of the procedure -func enforceRequirements(featureGatesString, kubeConfigPath, cfgPath string, printConfig, dryRun bool) (*upgradeVariables, error) { +func enforceRequirements(featureGatesString, kubeConfigPath, cfgPath string, printConfig, dryRun bool, ignoreChecksErrors sets.String) (*upgradeVariables, error) { client, err := getClient(kubeConfigPath, dryRun) if err != nil { return nil, fmt.Errorf("couldn't create a Kubernetes client from file %q: %v", kubeConfigPath, err) } // Run healthchecks against the cluster - if err := upgrade.CheckClusterHealth(client); err != nil { + if err := upgrade.CheckClusterHealth(client, ignoreChecksErrors); err != nil { return nil, fmt.Errorf("[upgrade/health] FATAL: %v", err) } diff --git a/cmd/kubeadm/app/cmd/upgrade/plan.go b/cmd/kubeadm/app/cmd/upgrade/plan.go index 6d7cee8317e..61e0b4376a6 100644 --- a/cmd/kubeadm/app/cmd/upgrade/plan.go +++ b/cmd/kubeadm/app/cmd/upgrade/plan.go @@ -55,7 +55,7 @@ func NewCmdPlan(parentFlags *cmdUpgradeFlags) *cobra.Command { // RunPlan takes care of outputting available versions to upgrade to for the user func RunPlan(parentFlags *cmdUpgradeFlags) error { // Start with the basics, verify that the cluster is healthy, build a client and a versionGetter. Never set dry-run for plan. - upgradeVars, err := enforceRequirements(parentFlags.featureGatesString, parentFlags.kubeConfigPath, parentFlags.cfgPath, parentFlags.printConfig, false) + upgradeVars, err := enforceRequirements(parentFlags.featureGatesString, parentFlags.kubeConfigPath, parentFlags.cfgPath, parentFlags.printConfig, false, parentFlags.ignorePreflightErrorsSet) if err != nil { return err } diff --git a/cmd/kubeadm/app/phases/upgrade/BUILD b/cmd/kubeadm/app/phases/upgrade/BUILD index 02c1243eb37..907707d127e 100644 --- a/cmd/kubeadm/app/phases/upgrade/BUILD +++ b/cmd/kubeadm/app/phases/upgrade/BUILD @@ -32,6 +32,7 @@ go_library( "//cmd/kubeadm/app/phases/etcd:go_default_library", "//cmd/kubeadm/app/phases/selfhosting:go_default_library", "//cmd/kubeadm/app/phases/uploadconfig:go_default_library", + "//cmd/kubeadm/app/preflight:go_default_library", "//cmd/kubeadm/app/util:go_default_library", "//cmd/kubeadm/app/util/apiclient:go_default_library", "//cmd/kubeadm/app/util/config:go_default_library", @@ -42,8 +43,10 @@ go_library( "//vendor/k8s.io/api/core/v1:go_default_library", "//vendor/k8s.io/apimachinery/pkg/api/errors:go_default_library", "//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library", + "//vendor/k8s.io/apimachinery/pkg/labels:go_default_library", "//vendor/k8s.io/apimachinery/pkg/runtime:go_default_library", "//vendor/k8s.io/apimachinery/pkg/util/errors:go_default_library", + "//vendor/k8s.io/apimachinery/pkg/util/sets:go_default_library", "//vendor/k8s.io/client-go/kubernetes:go_default_library", ], ) diff --git a/cmd/kubeadm/app/phases/upgrade/health.go b/cmd/kubeadm/app/phases/upgrade/health.go index 719eced4f30..2ce9314fac9 100644 --- a/cmd/kubeadm/app/phases/upgrade/health.go +++ b/cmd/kubeadm/app/phases/upgrade/health.go @@ -24,73 +24,72 @@ import ( apps "k8s.io/api/apps/v1beta2" "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/labels" + "k8s.io/apimachinery/pkg/util/sets" clientset "k8s.io/client-go/kubernetes" "k8s.io/kubernetes/cmd/kubeadm/app/constants" + "k8s.io/kubernetes/cmd/kubeadm/app/preflight" ) // healthCheck is a helper struct for easily performing healthchecks against the cluster and printing the output type healthCheck struct { - description, okMessage, failMessage string - // f is invoked with a k8s client passed to it. Should return an optional warning and/or an error + name string + client clientset.Interface + // f is invoked with a k8s client passed to it. Should return an optional error f func(clientset.Interface) error } +// Check is part of the preflight.Checker interface +func (c *healthCheck) Check() (warnings, errors []error) { + if err := c.f(c.client); err != nil { + return nil, []error{err} + } + return nil, nil +} + +// Name is part of the preflight.Checker interface +func (c *healthCheck) Name() string { + return c.name +} + // CheckClusterHealth makes sure: // - the API /healthz endpoint is healthy -// - all Nodes are Ready +// - all master Nodes are Ready // - (if self-hosted) that there are DaemonSets with at least one Pod for all control plane components // - (if static pod-hosted) that all required Static Pod manifests exist on disk -func CheckClusterHealth(client clientset.Interface) error { +func CheckClusterHealth(client clientset.Interface, ignoreChecksErrors sets.String) error { fmt.Println("[upgrade] Making sure the cluster is healthy:") - healthChecks := []healthCheck{ - { - description: "API Server health", - okMessage: "Healthy", - failMessage: "Unhealthy", - f: apiServerHealthy, + healthChecks := []preflight.Checker{ + &healthCheck{ + name: "APIServerHealth", + client: client, + f: apiServerHealthy, }, - { - description: "Node health", - okMessage: "All Nodes are healthy", - failMessage: "More than one Node unhealthy", - f: nodesHealthy, + &healthCheck{ + name: "MasterNodesReady", + client: client, + f: masterNodesReady, }, // TODO: Add a check for ComponentStatuses here? } // Run slightly different health checks depending on control plane hosting type if IsControlPlaneSelfHosted(client) { - healthChecks = append(healthChecks, healthCheck{ - description: "Control plane DaemonSet health", - okMessage: "All control plane DaemonSets are healthy", - failMessage: "More than one control plane DaemonSet unhealthy", - f: controlPlaneHealth, + healthChecks = append(healthChecks, &healthCheck{ + name: "ControlPlaneHealth", + client: client, + f: controlPlaneHealth, }) } else { - healthChecks = append(healthChecks, healthCheck{ - description: "Static Pod manifests exists on disk", - okMessage: "All manifests exist on disk", - failMessage: "Some manifests don't exist on disk", - f: staticPodManifestHealth, + healthChecks = append(healthChecks, &healthCheck{ + name: "StaticPodManifest", + client: client, + f: staticPodManifestHealth, }) } - return runHealthChecks(client, healthChecks) -} - -// runHealthChecks runs a set of health checks against the cluster -func runHealthChecks(client clientset.Interface, healthChecks []healthCheck) error { - for _, check := range healthChecks { - - err := check.f(client) - if err != nil { - fmt.Printf("[upgrade/health] Checking %s: %s\n", check.description, check.failMessage) - return fmt.Errorf("The cluster is not in an upgradeable state due to: %v", err) - } - fmt.Printf("[upgrade/health] Checking %s: %s\n", check.description, check.okMessage) - } - return nil + return preflight.RunChecks(healthChecks, os.Stderr, ignoreChecksErrors) } // apiServerHealthy checks whether the API server's /healthz endpoint is healthy @@ -108,16 +107,25 @@ func apiServerHealthy(client clientset.Interface) error { return nil } -// nodesHealthy checks whether all Nodes in the cluster are in the Running state -func nodesHealthy(client clientset.Interface) error { - nodes, err := client.CoreV1().Nodes().List(metav1.ListOptions{}) +// masterNodesReady checks whether all master Nodes in the cluster are in the Running state +func masterNodesReady(client clientset.Interface) error { + selector := labels.SelectorFromSet(labels.Set(map[string]string{ + constants.LabelNodeRoleMaster: "", + })) + masters, err := client.CoreV1().Nodes().List(metav1.ListOptions{ + LabelSelector: selector.String(), + }) if err != nil { - return fmt.Errorf("couldn't list all nodes in cluster: %v", err) + return fmt.Errorf("couldn't list masters in cluster: %v", err) } - notReadyNodes := getNotReadyNodes(nodes.Items) - if len(notReadyNodes) != 0 { - return fmt.Errorf("there are NotReady Nodes in the cluster: %v", notReadyNodes) + if len(masters.Items) == 0 { + return fmt.Errorf("failed to find any nodes with master role") + } + + notReadyMasters := getNotReadyNodes(masters.Items) + if len(notReadyMasters) != 0 { + return fmt.Errorf("there are NotReady masters in the cluster: %v", notReadyMasters) } return nil }