mirror of
https://github.com/rancher/rke.git
synced 2025-07-05 11:37:48 +00:00
Reconcile workerplane for NotReady control hosts
This commit is contained in:
parent
cd8271c976
commit
1ecf6effbf
@ -193,16 +193,32 @@ func (c *Cluster) UpgradeControlPlane(ctx context.Context, kubeClient *kubernete
|
||||
if len(notReadyHostNames) > 0 {
|
||||
// attempt upgrade on NotReady hosts without respecting max_unavailable_controlplane
|
||||
logrus.Infof("Attempting upgrade of controlplane components on following hosts in NotReady status: %v", strings.Join(notReadyHostNames, ","))
|
||||
services.RunControlPlane(ctx, notReadyHosts,
|
||||
err = services.RunControlPlane(ctx, notReadyHosts,
|
||||
c.LocalConnDialerFactory,
|
||||
c.PrivateRegistriesMap,
|
||||
cpNodePlanMap,
|
||||
c.UpdateWorkersOnly,
|
||||
c.SystemImages.Alpine,
|
||||
c.Certificates)
|
||||
if err != nil {
|
||||
logrus.Errorf("Failed to upgrade controlplane components on NotReady hosts, error: %v", err)
|
||||
}
|
||||
err = services.RunWorkerPlane(ctx, notReadyHosts,
|
||||
c.LocalConnDialerFactory,
|
||||
c.PrivateRegistriesMap,
|
||||
cpNodePlanMap,
|
||||
c.Certificates,
|
||||
c.UpdateWorkersOnly,
|
||||
c.SystemImages.Alpine)
|
||||
if err != nil {
|
||||
logrus.Errorf("Failed to upgrade worker components on NotReady hosts, error: %v", err)
|
||||
}
|
||||
// Calling CheckNodeReady wil give some time for nodes to get in Ready state
|
||||
for _, host := range notReadyHosts {
|
||||
services.CheckNodeReady(kubeClient, host, services.ControlRole)
|
||||
err = services.CheckNodeReady(kubeClient, host, services.ControlRole)
|
||||
if err != nil {
|
||||
logrus.Errorf("Host %v failed to report Ready status with error: %v", host.HostnameOverride, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
// rolling upgrade respecting maxUnavailable
|
||||
@ -294,16 +310,22 @@ func (c *Cluster) UpgradeWorkerPlane(ctx context.Context, kubeClient *kubernetes
|
||||
if len(notReadyHostNames) > 0 {
|
||||
// attempt upgrade on NotReady hosts without respecting max_unavailable_worker
|
||||
logrus.Infof("Attempting upgrade of worker components on following hosts in NotReady status: %v", strings.Join(notReadyHostNames, ","))
|
||||
services.RunWorkerPlane(ctx, notReadyHosts,
|
||||
err = services.RunWorkerPlane(ctx, notReadyHosts,
|
||||
c.LocalConnDialerFactory,
|
||||
c.PrivateRegistriesMap,
|
||||
workerNodePlanMap,
|
||||
c.Certificates,
|
||||
c.UpdateWorkersOnly,
|
||||
c.SystemImages.Alpine)
|
||||
if err != nil {
|
||||
logrus.Errorf("Failed to upgrade worker components on NotReady hosts, error: %v", err)
|
||||
}
|
||||
// Calling CheckNodeReady wil give some time for nodes to get in Ready state
|
||||
for _, host := range notReadyHosts {
|
||||
services.CheckNodeReady(kubeClient, host, services.WorkerRole)
|
||||
err = services.CheckNodeReady(kubeClient, host, services.WorkerRole)
|
||||
if err != nil {
|
||||
logrus.Errorf("Host %v failed to report Ready status with error: %v", host.HostnameOverride, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -56,7 +56,7 @@ func UpgradeControlPlaneNodes(ctx context.Context, kubeClient *kubernetes.Client
|
||||
}
|
||||
var errMsgMaxUnavailableNotFailed string
|
||||
var drainHelper drain.Helper
|
||||
log.Infof(ctx, "[%s] Processing controlplane hosts for upgrade one at a time", ControlRole)
|
||||
log.Infof(ctx, "[%s] Processing controlplane hosts for upgrade %v at a time", ControlRole, maxUnavailable)
|
||||
if len(newHosts) > 0 {
|
||||
var nodes []string
|
||||
for _, host := range controlHosts {
|
||||
@ -163,6 +163,10 @@ func processControlPlaneForUpgrade(ctx context.Context, kubeClient *kubernetes.C
|
||||
}
|
||||
if !controlPlaneUpgradable && !workerPlaneUpgradable {
|
||||
log.Infof(ctx, "Upgrade not required for controlplane and worker components of host %v", runHost.HostnameOverride)
|
||||
if err := k8s.CordonUncordon(kubeClient, runHost.HostnameOverride, false); err != nil {
|
||||
// This node didn't undergo an upgrade, so RKE will only log any error after uncordoning it and won't count this in maxUnavailable
|
||||
logrus.Errorf("[controlplane] Failed to uncordon node %v, error: %v", runHost.HostnameOverride, err)
|
||||
}
|
||||
continue
|
||||
}
|
||||
if err := upgradeControlHost(ctx, kubeClient, runHost, upgradeStrategy.Drain, drainHelper, localConnDialerFactory, prsMap, cpNodePlanMap, updateWorkersOnly, alpineImage, certMap, controlPlaneUpgradable, workerPlaneUpgradable); err != nil {
|
||||
|
@ -163,6 +163,10 @@ func processWorkerPlaneForUpgrade(ctx context.Context, kubeClient *kubernetes.Cl
|
||||
}
|
||||
if !upgradable {
|
||||
logrus.Infof("[workerplane] Upgrade not required for worker components of host %v", runHost.HostnameOverride)
|
||||
if err := k8s.CordonUncordon(kubeClient, runHost.HostnameOverride, false); err != nil {
|
||||
// This node didn't undergo an upgrade, so RKE will only log any error after uncordoning it and won't count this in maxUnavailable
|
||||
logrus.Errorf("[workerplane] Failed to uncordon node %v, error: %v", runHost.HostnameOverride, err)
|
||||
}
|
||||
continue
|
||||
}
|
||||
if err := upgradeWorkerHost(ctx, kubeClient, runHost, upgradeStrategy.Drain, drainHelper, localConnDialerFactory, prsMap, workerNodePlanMap, certMap, updateWorkersOnly, alpineImage); err != nil {
|
||||
|
Loading…
Reference in New Issue
Block a user