mirror of
https://github.com/rancher/rke.git
synced 2025-04-27 19:25:44 +00:00
Fix maxunavailable reset
This commit is contained in:
parent
6e3d1f0a15
commit
115c11b864
@ -72,7 +72,10 @@ func UpgradeControlPlaneNodes(ctx context.Context, kubeClient *kubernetes.Client
|
||||
drainHelper = getDrainHelper(kubeClient, *upgradeStrategy)
|
||||
log.Infof(ctx, "[%s] Parameters provided to drain command: %#v", ControlRole, fmt.Sprintf("Force: %v, IgnoreAllDaemonSets: %v, DeleteLocalData: %v, Timeout: %v, GracePeriodSeconds: %v", drainHelper.Force, drainHelper.IgnoreAllDaemonSets, drainHelper.DeleteLocalData, drainHelper.Timeout, drainHelper.GracePeriodSeconds))
|
||||
}
|
||||
maxUnavailable = resetMaxUnavailable(maxUnavailable, len(inactiveHosts))
|
||||
maxUnavailable, err := resetMaxUnavailable(maxUnavailable, len(inactiveHosts), ControlRole)
|
||||
if err != nil {
|
||||
return errMsgMaxUnavailableNotFailed, err
|
||||
}
|
||||
hostsFailedToUpgrade, err := processControlPlaneForUpgrade(ctx, kubeClient, controlHosts, localConnDialerFactory, prsMap, cpNodePlanMap, updateWorkersOnly, alpineImage, certMap,
|
||||
upgradeStrategy, newHosts, inactiveHosts, maxUnavailable, drainHelper)
|
||||
if err != nil {
|
||||
|
@ -104,18 +104,21 @@ func CalculateMaxUnavailable(maxUnavailableVal string, numHosts int) (int, error
|
||||
return maxUnavailable, nil
|
||||
}
|
||||
|
||||
func resetMaxUnavailable(maxUnavailable, lenInactiveHosts int) int {
|
||||
func resetMaxUnavailable(maxUnavailable, lenInactiveHosts int, component string) (int, error) {
|
||||
if maxUnavailable > WorkerThreads {
|
||||
/* upgrading a large number of nodes in parallel leads to a large number of goroutines, which has led to errors regarding too many open sockets
|
||||
Because of this RKE switched to using workerpools. 50 workerthreads has been sufficient to optimize rke up, upgrading at most 50 nodes in parallel.
|
||||
So the user configurable maxUnavailable will be respected only as long as it's less than 50 and capped at 50 */
|
||||
maxUnavailable = WorkerThreads
|
||||
logrus.Info("Resetting maxUnavailable to 50, to avoid issues related to upgrading large number of nodes in parallel")
|
||||
logrus.Infof("Resetting %s to 50, to avoid issues related to upgrading large number of nodes in parallel", "max_unavailable_"+component)
|
||||
}
|
||||
|
||||
if lenInactiveHosts > 0 {
|
||||
if maxUnavailable == lenInactiveHosts {
|
||||
return 0, fmt.Errorf("cannot proceed with upgrade of %s since %v host(s) are found to be inactive prior to upgrade", component, lenInactiveHosts)
|
||||
}
|
||||
maxUnavailable -= lenInactiveHosts
|
||||
logrus.Infof("Resetting maxUnavailable to %v since %v host(s) are found to be inactive/unavailable prior to upgrade", maxUnavailable, lenInactiveHosts)
|
||||
logrus.Infof("Resetting %s to %v since %v host(s) are found to be inactive prior to upgrade", "max_unavailable_"+component, maxUnavailable, lenInactiveHosts)
|
||||
}
|
||||
return maxUnavailable
|
||||
return maxUnavailable, nil
|
||||
}
|
||||
|
@ -55,7 +55,10 @@ func RunWorkerPlane(ctx context.Context, allHosts []*hosts.Host, localConnDialer
|
||||
func UpgradeWorkerPlaneForWorkerAndEtcdNodes(ctx context.Context, kubeClient *kubernetes.Clientset, mixedRolesHosts []*hosts.Host, workerOnlyHosts []*hosts.Host, inactiveHosts map[string]bool, localConnDialerFactory hosts.DialerFactory, prsMap map[string]v3.PrivateRegistry, workerNodePlanMap map[string]v3.RKEConfigNodePlan, certMap map[string]pki.CertificatePKI, updateWorkersOnly bool, alpineImage string, upgradeStrategy *v3.NodeUpgradeStrategy, newHosts map[string]bool, maxUnavailable int) (string, error) {
|
||||
log.Infof(ctx, "[%s] Upgrading Worker Plane..", WorkerRole)
|
||||
var errMsgMaxUnavailableNotFailed string
|
||||
maxUnavailable = resetMaxUnavailable(maxUnavailable, len(inactiveHosts))
|
||||
maxUnavailable, err := resetMaxUnavailable(maxUnavailable, len(inactiveHosts), WorkerRole)
|
||||
if err != nil {
|
||||
return errMsgMaxUnavailableNotFailed, err
|
||||
}
|
||||
updateNewHostsList(kubeClient, append(mixedRolesHosts, workerOnlyHosts...), newHosts)
|
||||
if len(mixedRolesHosts) > 0 {
|
||||
log.Infof(ctx, "First checking and processing worker components for upgrades on nodes with etcd role one at a time")
|
||||
|
Loading…
Reference in New Issue
Block a user