1
0
mirror of https://github.com/rancher/rke.git synced 2025-09-15 22:49:13 +00:00

Accept label to ignore nodes during upgrade

RKE does a cluster scan to find the unreachable hosts, and if that number
is same as or exceeds maxUnavailable, upgrade won't proceed.
This commit introduces a label users can provide for their nodes so they
don't get counted as unavailable and are excluded from upgrade.
This commit also includes a couple of bug fixes
This commit is contained in:
rajashree
2020-02-14 09:40:23 -08:00
parent 92714e5523
commit 968a399f26
9 changed files with 197 additions and 77 deletions

View File

@@ -2,6 +2,7 @@ package services
import (
"context"
"fmt"
"strings"
"sync"
@@ -47,13 +48,15 @@ func RunControlPlane(ctx context.Context, controlHosts []*hosts.Host, localConnD
return nil
}
func UpgradeControlPlane(ctx context.Context, kubeClient *kubernetes.Clientset, controlHosts []*hosts.Host, localConnDialerFactory hosts.DialerFactory, prsMap map[string]v3.PrivateRegistry, cpNodePlanMap map[string]v3.RKEConfigNodePlan, updateWorkersOnly bool, alpineImage string, certMap map[string]pki.CertificatePKI, upgradeStrategy *v3.NodeUpgradeStrategy, newHosts map[string]bool) error {
func UpgradeControlPlaneNodes(ctx context.Context, kubeClient *kubernetes.Clientset, controlHosts []*hosts.Host, localConnDialerFactory hosts.DialerFactory,
prsMap map[string]v3.PrivateRegistry, cpNodePlanMap map[string]v3.RKEConfigNodePlan, updateWorkersOnly bool, alpineImage string, certMap map[string]pki.CertificatePKI,
upgradeStrategy *v3.NodeUpgradeStrategy, newHosts, inactiveHosts map[string]bool) error {
if updateWorkersOnly {
return nil
}
var drainHelper drain.Helper
log.Infof(ctx, "[%s] Processing control plane components for upgrade one at a time", ControlRole)
log.Infof(ctx, "[%s] Processing controlplane hosts for upgrade one at a time", ControlRole)
if len(newHosts) > 0 {
var nodes []string
for _, host := range controlHosts {
@@ -67,6 +70,12 @@ func UpgradeControlPlane(ctx context.Context, kubeClient *kubernetes.Clientset,
}
if upgradeStrategy.Drain {
drainHelper = getDrainHelper(kubeClient, *upgradeStrategy)
log.Infof(ctx, "[%s] Parameters provided to drain command: %#v", ControlRole, fmt.Sprintf("Force: %v, IgnoreAllDaemonSets: %v, DeleteLocalData: %v, Timeout: %v, GracePeriodSeconds: %v", drainHelper.Force, drainHelper.IgnoreAllDaemonSets, drainHelper.DeleteLocalData, drainHelper.Timeout, drainHelper.GracePeriodSeconds))
}
currentHostsPool := make(map[string]bool)
for _, host := range controlHosts {
currentHostsPool[host.HostnameOverride] = true
}
// upgrade control plane hosts one at a time for zero downtime upgrades
for _, host := range controlHosts {
@@ -75,16 +84,19 @@ func UpgradeControlPlane(ctx context.Context, kubeClient *kubernetes.Clientset,
if err := doDeployControlHost(ctx, host, localConnDialerFactory, prsMap, cpNodePlanMap[host.Address].Processes, alpineImage, certMap); err != nil {
return err
}
if err := doDeployWorkerPlaneHost(ctx, host, localConnDialerFactory, prsMap, cpNodePlanMap[host.Address].Processes, certMap, updateWorkersOnly, alpineImage); err != nil {
return err
}
continue
}
nodes, err := getNodeListForUpgrade(kubeClient, &sync.Map{}, newHosts, false)
nodes, err := getNodeListForUpgrade(kubeClient, &sync.Map{}, newHosts, inactiveHosts)
if err != nil {
return err
}
var maxUnavailableHit bool
for _, node := range nodes {
// in case any previously added nodes or till now unprocessed nodes become unreachable during upgrade
if !k8s.IsNodeReady(node) {
if !k8s.IsNodeReady(node) && currentHostsPool[node.Labels[k8s.HostnameLabel]] {
maxUnavailableHit = true
break
}
@@ -93,27 +105,19 @@ func UpgradeControlPlane(ctx context.Context, kubeClient *kubernetes.Clientset,
return err
}
upgradable, err := isControlPlaneHostUpgradable(ctx, host, cpNodePlanMap[host.Address].Processes)
controlPlaneUpgradable, err := isControlPlaneHostUpgradable(ctx, host, cpNodePlanMap[host.Address].Processes)
if err != nil {
return err
}
if !upgradable {
log.Infof(ctx, "Upgrade not required for controlplane components of host %v", host.HostnameOverride)
workerPlaneUpgradable, err := isWorkerHostUpgradable(ctx, host, cpNodePlanMap[host.Address].Processes)
if err != nil {
return err
}
if !controlPlaneUpgradable && !workerPlaneUpgradable {
log.Infof(ctx, "Upgrade not required for controlplane and worker components of host %v", host.HostnameOverride)
continue
}
if err := checkNodeReady(kubeClient, host, ControlRole); err != nil {
return err
}
if err := cordonAndDrainNode(kubeClient, host, upgradeStrategy.Drain, drainHelper, ControlRole); err != nil {
return err
}
if err := doDeployControlHost(ctx, host, localConnDialerFactory, prsMap, cpNodePlanMap[host.Address].Processes, alpineImage, certMap); err != nil {
return err
}
if err := checkNodeReady(kubeClient, host, ControlRole); err != nil {
return err
}
if err := k8s.CordonUncordon(kubeClient, host.HostnameOverride, false); err != nil {
if err := upgradeControlHost(ctx, kubeClient, host, upgradeStrategy.Drain, drainHelper, localConnDialerFactory, prsMap, cpNodePlanMap, updateWorkersOnly, alpineImage, certMap, controlPlaneUpgradable, workerPlaneUpgradable); err != nil {
return err
}
}
@@ -121,6 +125,37 @@ func UpgradeControlPlane(ctx context.Context, kubeClient *kubernetes.Clientset,
return nil
}
func upgradeControlHost(ctx context.Context, kubeClient *kubernetes.Clientset, host *hosts.Host, drain bool, drainHelper drain.Helper,
localConnDialerFactory hosts.DialerFactory, prsMap map[string]v3.PrivateRegistry, cpNodePlanMap map[string]v3.RKEConfigNodePlan, updateWorkersOnly bool,
alpineImage string, certMap map[string]pki.CertificatePKI, controlPlaneUpgradable, workerPlaneUpgradable bool) error {
if err := checkNodeReady(kubeClient, host, ControlRole); err != nil {
return err
}
if err := cordonAndDrainNode(kubeClient, host, drain, drainHelper, ControlRole); err != nil {
return err
}
if controlPlaneUpgradable {
log.Infof(ctx, "Upgrading controlplane components for control host %v", host.HostnameOverride)
if err := doDeployControlHost(ctx, host, localConnDialerFactory, prsMap, cpNodePlanMap[host.Address].Processes, alpineImage, certMap); err != nil {
return err
}
}
if workerPlaneUpgradable {
log.Infof(ctx, "Upgrading workerplane components for control host %v", host.HostnameOverride)
if err := doDeployWorkerPlaneHost(ctx, host, localConnDialerFactory, prsMap, cpNodePlanMap[host.Address].Processes, certMap, updateWorkersOnly, alpineImage); err != nil {
return err
}
}
if err := checkNodeReady(kubeClient, host, ControlRole); err != nil {
return err
}
if err := k8s.CordonUncordon(kubeClient, host.HostnameOverride, false); err != nil {
return err
}
return nil
}
func RemoveControlPlane(ctx context.Context, controlHosts []*hosts.Host, force bool) error {
log.Infof(ctx, "[%s] Tearing down the Controller Plane..", ControlRole)
var errgrp errgroup.Group