1
0
mirror of https://github.com/rancher/rke.git synced 2025-07-05 11:37:48 +00:00

Reconcile workerplane for NotReady control hosts

This commit is contained in:
rajashree 2020-03-20 11:11:59 -07:00
parent cd8271c976
commit 1ecf6effbf
3 changed files with 35 additions and 5 deletions

View File

@ -193,16 +193,32 @@ func (c *Cluster) UpgradeControlPlane(ctx context.Context, kubeClient *kubernete
if len(notReadyHostNames) > 0 {
// attempt upgrade on NotReady hosts without respecting max_unavailable_controlplane
logrus.Infof("Attempting upgrade of controlplane components on following hosts in NotReady status: %v", strings.Join(notReadyHostNames, ","))
services.RunControlPlane(ctx, notReadyHosts,
err = services.RunControlPlane(ctx, notReadyHosts,
c.LocalConnDialerFactory,
c.PrivateRegistriesMap,
cpNodePlanMap,
c.UpdateWorkersOnly,
c.SystemImages.Alpine,
c.Certificates)
if err != nil {
logrus.Errorf("Failed to upgrade controlplane components on NotReady hosts, error: %v", err)
}
err = services.RunWorkerPlane(ctx, notReadyHosts,
c.LocalConnDialerFactory,
c.PrivateRegistriesMap,
cpNodePlanMap,
c.Certificates,
c.UpdateWorkersOnly,
c.SystemImages.Alpine)
if err != nil {
logrus.Errorf("Failed to upgrade worker components on NotReady hosts, error: %v", err)
}
// Calling CheckNodeReady wil give some time for nodes to get in Ready state
for _, host := range notReadyHosts {
services.CheckNodeReady(kubeClient, host, services.ControlRole)
err = services.CheckNodeReady(kubeClient, host, services.ControlRole)
if err != nil {
logrus.Errorf("Host %v failed to report Ready status with error: %v", host.HostnameOverride, err)
}
}
}
// rolling upgrade respecting maxUnavailable
@ -294,16 +310,22 @@ func (c *Cluster) UpgradeWorkerPlane(ctx context.Context, kubeClient *kubernetes
if len(notReadyHostNames) > 0 {
// attempt upgrade on NotReady hosts without respecting max_unavailable_worker
logrus.Infof("Attempting upgrade of worker components on following hosts in NotReady status: %v", strings.Join(notReadyHostNames, ","))
services.RunWorkerPlane(ctx, notReadyHosts,
err = services.RunWorkerPlane(ctx, notReadyHosts,
c.LocalConnDialerFactory,
c.PrivateRegistriesMap,
workerNodePlanMap,
c.Certificates,
c.UpdateWorkersOnly,
c.SystemImages.Alpine)
if err != nil {
logrus.Errorf("Failed to upgrade worker components on NotReady hosts, error: %v", err)
}
// Calling CheckNodeReady wil give some time for nodes to get in Ready state
for _, host := range notReadyHosts {
services.CheckNodeReady(kubeClient, host, services.WorkerRole)
err = services.CheckNodeReady(kubeClient, host, services.WorkerRole)
if err != nil {
logrus.Errorf("Host %v failed to report Ready status with error: %v", host.HostnameOverride, err)
}
}
}

View File

@ -56,7 +56,7 @@ func UpgradeControlPlaneNodes(ctx context.Context, kubeClient *kubernetes.Client
}
var errMsgMaxUnavailableNotFailed string
var drainHelper drain.Helper
log.Infof(ctx, "[%s] Processing controlplane hosts for upgrade one at a time", ControlRole)
log.Infof(ctx, "[%s] Processing controlplane hosts for upgrade %v at a time", ControlRole, maxUnavailable)
if len(newHosts) > 0 {
var nodes []string
for _, host := range controlHosts {
@ -163,6 +163,10 @@ func processControlPlaneForUpgrade(ctx context.Context, kubeClient *kubernetes.C
}
if !controlPlaneUpgradable && !workerPlaneUpgradable {
log.Infof(ctx, "Upgrade not required for controlplane and worker components of host %v", runHost.HostnameOverride)
if err := k8s.CordonUncordon(kubeClient, runHost.HostnameOverride, false); err != nil {
// This node didn't undergo an upgrade, so RKE will only log any error after uncordoning it and won't count this in maxUnavailable
logrus.Errorf("[controlplane] Failed to uncordon node %v, error: %v", runHost.HostnameOverride, err)
}
continue
}
if err := upgradeControlHost(ctx, kubeClient, runHost, upgradeStrategy.Drain, drainHelper, localConnDialerFactory, prsMap, cpNodePlanMap, updateWorkersOnly, alpineImage, certMap, controlPlaneUpgradable, workerPlaneUpgradable); err != nil {

View File

@ -163,6 +163,10 @@ func processWorkerPlaneForUpgrade(ctx context.Context, kubeClient *kubernetes.Cl
}
if !upgradable {
logrus.Infof("[workerplane] Upgrade not required for worker components of host %v", runHost.HostnameOverride)
if err := k8s.CordonUncordon(kubeClient, runHost.HostnameOverride, false); err != nil {
// This node didn't undergo an upgrade, so RKE will only log any error after uncordoning it and won't count this in maxUnavailable
logrus.Errorf("[workerplane] Failed to uncordon node %v, error: %v", runHost.HostnameOverride, err)
}
continue
}
if err := upgradeWorkerHost(ctx, kubeClient, runHost, upgradeStrategy.Drain, drainHelper, localConnDialerFactory, prsMap, workerNodePlanMap, certMap, updateWorkersOnly, alpineImage); err != nil {