1
0
mirror of https://github.com/rancher/rke.git synced 2025-04-27 19:25:44 +00:00

Fix maxunavailable reset

This commit is contained in:
rajashree 2020-02-28 11:25:12 -08:00
parent 6e3d1f0a15
commit 115c11b864
3 changed files with 15 additions and 6 deletions

View File

@ -72,7 +72,10 @@ func UpgradeControlPlaneNodes(ctx context.Context, kubeClient *kubernetes.Client
drainHelper = getDrainHelper(kubeClient, *upgradeStrategy)
log.Infof(ctx, "[%s] Parameters provided to drain command: %#v", ControlRole, fmt.Sprintf("Force: %v, IgnoreAllDaemonSets: %v, DeleteLocalData: %v, Timeout: %v, GracePeriodSeconds: %v", drainHelper.Force, drainHelper.IgnoreAllDaemonSets, drainHelper.DeleteLocalData, drainHelper.Timeout, drainHelper.GracePeriodSeconds))
}
maxUnavailable = resetMaxUnavailable(maxUnavailable, len(inactiveHosts))
maxUnavailable, err := resetMaxUnavailable(maxUnavailable, len(inactiveHosts), ControlRole)
if err != nil {
return errMsgMaxUnavailableNotFailed, err
}
hostsFailedToUpgrade, err := processControlPlaneForUpgrade(ctx, kubeClient, controlHosts, localConnDialerFactory, prsMap, cpNodePlanMap, updateWorkersOnly, alpineImage, certMap,
upgradeStrategy, newHosts, inactiveHosts, maxUnavailable, drainHelper)
if err != nil {

View File

@ -104,18 +104,21 @@ func CalculateMaxUnavailable(maxUnavailableVal string, numHosts int) (int, error
return maxUnavailable, nil
}
func resetMaxUnavailable(maxUnavailable, lenInactiveHosts int) int {
func resetMaxUnavailable(maxUnavailable, lenInactiveHosts int, component string) (int, error) {
if maxUnavailable > WorkerThreads {
/* upgrading a large number of nodes in parallel leads to a large number of goroutines, which has led to errors regarding too many open sockets
Because of this RKE switched to using workerpools. 50 workerthreads has been sufficient to optimize rke up, upgrading at most 50 nodes in parallel.
So the user configurable maxUnavailable will be respected only as long as it's less than 50 and capped at 50 */
maxUnavailable = WorkerThreads
logrus.Info("Resetting maxUnavailable to 50, to avoid issues related to upgrading large number of nodes in parallel")
logrus.Infof("Resetting %s to 50, to avoid issues related to upgrading large number of nodes in parallel", "max_unavailable_"+component)
}
if lenInactiveHosts > 0 {
if maxUnavailable == lenInactiveHosts {
return 0, fmt.Errorf("cannot proceed with upgrade of %s since %v host(s) are found to be inactive prior to upgrade", component, lenInactiveHosts)
}
maxUnavailable -= lenInactiveHosts
logrus.Infof("Resetting maxUnavailable to %v since %v host(s) are found to be inactive/unavailable prior to upgrade", maxUnavailable, lenInactiveHosts)
logrus.Infof("Resetting %s to %v since %v host(s) are found to be inactive prior to upgrade", "max_unavailable_"+component, maxUnavailable, lenInactiveHosts)
}
return maxUnavailable
return maxUnavailable, nil
}

View File

@ -55,7 +55,10 @@ func RunWorkerPlane(ctx context.Context, allHosts []*hosts.Host, localConnDialer
func UpgradeWorkerPlaneForWorkerAndEtcdNodes(ctx context.Context, kubeClient *kubernetes.Clientset, mixedRolesHosts []*hosts.Host, workerOnlyHosts []*hosts.Host, inactiveHosts map[string]bool, localConnDialerFactory hosts.DialerFactory, prsMap map[string]v3.PrivateRegistry, workerNodePlanMap map[string]v3.RKEConfigNodePlan, certMap map[string]pki.CertificatePKI, updateWorkersOnly bool, alpineImage string, upgradeStrategy *v3.NodeUpgradeStrategy, newHosts map[string]bool, maxUnavailable int) (string, error) {
log.Infof(ctx, "[%s] Upgrading Worker Plane..", WorkerRole)
var errMsgMaxUnavailableNotFailed string
maxUnavailable = resetMaxUnavailable(maxUnavailable, len(inactiveHosts))
maxUnavailable, err := resetMaxUnavailable(maxUnavailable, len(inactiveHosts), WorkerRole)
if err != nil {
return errMsgMaxUnavailableNotFailed, err
}
updateNewHostsList(kubeClient, append(mixedRolesHosts, workerOnlyHosts...), newHosts)
if len(mixedRolesHosts) > 0 {
log.Infof(ctx, "First checking and processing worker components for upgrades on nodes with etcd role one at a time")