mirror of
https://github.com/rancher/rke.git
synced 2025-08-28 19:31:04 +00:00
Merge pull request #1947 from mrajashree/bufixes
MaxUnavailable and powered off hosts comparison
This commit is contained in:
commit
47ba695d73
@ -166,6 +166,7 @@ func (c *Cluster) UpgradeControlPlane(ctx context.Context, kubeClient *kubernete
|
||||
inactiveHosts := make(map[string]bool)
|
||||
var controlPlaneHosts, notReadyHosts []*hosts.Host
|
||||
var notReadyHostNames []string
|
||||
var err error
|
||||
|
||||
for _, host := range c.InactiveHosts {
|
||||
// include only hosts with controlplane role
|
||||
@ -173,6 +174,10 @@ func (c *Cluster) UpgradeControlPlane(ctx context.Context, kubeClient *kubernete
|
||||
inactiveHosts[host.HostnameOverride] = true
|
||||
}
|
||||
}
|
||||
c.MaxUnavailableForControlNodes, err = services.ResetMaxUnavailable(c.MaxUnavailableForControlNodes, len(inactiveHosts), services.ControlRole)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
for _, host := range c.ControlPlaneHosts {
|
||||
if !c.HostsLabeledToIgnoreUpgrade[host.Address] {
|
||||
controlPlaneHosts = append(controlPlaneHosts, host)
|
||||
@ -265,6 +270,7 @@ func (c *Cluster) UpgradeWorkerPlane(ctx context.Context, kubeClient *kubernetes
|
||||
inactiveHosts := make(map[string]bool)
|
||||
var notReadyHosts []*hosts.Host
|
||||
var notReadyHostNames []string
|
||||
var err error
|
||||
|
||||
for _, host := range c.InactiveHosts {
|
||||
// if host has controlplane role, it already has worker components upgraded
|
||||
@ -272,6 +278,10 @@ func (c *Cluster) UpgradeWorkerPlane(ctx context.Context, kubeClient *kubernetes
|
||||
inactiveHosts[host.HostnameOverride] = true
|
||||
}
|
||||
}
|
||||
c.MaxUnavailableForWorkerNodes, err = services.ResetMaxUnavailable(c.MaxUnavailableForWorkerNodes, len(inactiveHosts), services.WorkerRole)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
for _, host := range append(etcdAndWorkerHosts, workerOnlyHosts...) {
|
||||
if c.NewHosts[host.HostnameOverride] {
|
||||
continue
|
||||
|
@ -603,7 +603,7 @@ func GetExternalFlags(local, updateOnly, disablePortCheck bool, configDir, clust
|
||||
func (c *Cluster) setAddonsDefaults() {
|
||||
c.Ingress.UpdateStrategy = setDaemonsetAddonDefaults(c.Ingress.UpdateStrategy)
|
||||
c.Network.UpdateStrategy = setDaemonsetAddonDefaults(c.Network.UpdateStrategy)
|
||||
c.DNS.UpdateStrategy = setDeploymentAddonDefaults(c.DNS.UpdateStrategy)
|
||||
c.DNS.UpdateStrategy = setDNSDeploymentAddonDefaults(c.DNS.UpdateStrategy, c.DNS.Provider)
|
||||
if c.DNS.LinearAutoscalerParams == nil {
|
||||
c.DNS.LinearAutoscalerParams = &DefaultClusterProportionalAutoscalerLinearParams
|
||||
}
|
||||
@ -638,3 +638,43 @@ func setDeploymentAddonDefaults(updateStrategy *appsv1.DeploymentStrategy) *apps
|
||||
}
|
||||
return updateStrategy
|
||||
}
|
||||
|
||||
func setDNSDeploymentAddonDefaults(updateStrategy *appsv1.DeploymentStrategy, dnsProvider string) *appsv1.DeploymentStrategy {
|
||||
var (
|
||||
coreDNSMaxUnavailable, coreDNSMaxSurge = intstr.FromInt(1), intstr.FromInt(0)
|
||||
kubeDNSMaxSurge, kubeDNSMaxUnavailable = intstr.FromString("10%"), intstr.FromInt(0)
|
||||
)
|
||||
if updateStrategy != nil && updateStrategy.Type != appsv1.RollingUpdateDeploymentStrategyType {
|
||||
return updateStrategy
|
||||
}
|
||||
switch dnsProvider {
|
||||
case CoreDNSProvider:
|
||||
if updateStrategy == nil || updateStrategy.RollingUpdate == nil {
|
||||
return &appsv1.DeploymentStrategy{
|
||||
Type: appsv1.RollingUpdateDeploymentStrategyType,
|
||||
RollingUpdate: &appsv1.RollingUpdateDeployment{
|
||||
MaxUnavailable: &coreDNSMaxUnavailable,
|
||||
MaxSurge: &coreDNSMaxSurge,
|
||||
},
|
||||
}
|
||||
}
|
||||
if updateStrategy.RollingUpdate.MaxUnavailable == nil {
|
||||
updateStrategy.RollingUpdate.MaxUnavailable = &coreDNSMaxUnavailable
|
||||
}
|
||||
case KubeDNSProvider:
|
||||
if updateStrategy == nil || updateStrategy.RollingUpdate == nil {
|
||||
return &appsv1.DeploymentStrategy{
|
||||
Type: appsv1.RollingUpdateDeploymentStrategyType,
|
||||
RollingUpdate: &appsv1.RollingUpdateDeployment{
|
||||
MaxUnavailable: &kubeDNSMaxUnavailable,
|
||||
MaxSurge: &kubeDNSMaxSurge,
|
||||
},
|
||||
}
|
||||
}
|
||||
if updateStrategy.RollingUpdate.MaxSurge == nil {
|
||||
updateStrategy.RollingUpdate.MaxSurge = &kubeDNSMaxSurge
|
||||
}
|
||||
}
|
||||
|
||||
return updateStrategy
|
||||
}
|
||||
|
@ -72,16 +72,22 @@ func UpgradeControlPlaneNodes(ctx context.Context, kubeClient *kubernetes.Client
|
||||
drainHelper = getDrainHelper(kubeClient, *upgradeStrategy)
|
||||
log.Infof(ctx, "[%s] Parameters provided to drain command: %#v", ControlRole, fmt.Sprintf("Force: %v, IgnoreAllDaemonSets: %v, DeleteLocalData: %v, Timeout: %v, GracePeriodSeconds: %v", drainHelper.Force, drainHelper.IgnoreAllDaemonSets, drainHelper.DeleteLocalData, drainHelper.Timeout, drainHelper.GracePeriodSeconds))
|
||||
}
|
||||
maxUnavailable, err := resetMaxUnavailable(maxUnavailable, len(inactiveHosts), ControlRole)
|
||||
if err != nil {
|
||||
return errMsgMaxUnavailableNotFailed, err
|
||||
var inactiveHostErr error
|
||||
if len(inactiveHosts) > 0 {
|
||||
var inactiveHostNames []string
|
||||
for hostName := range inactiveHosts {
|
||||
inactiveHostNames = append(inactiveHostNames, hostName)
|
||||
}
|
||||
inactiveHostErr = fmt.Errorf("provisioning incomplete, host(s) [%s] skipped because they could not be contacted", strings.Join(inactiveHostNames, ","))
|
||||
}
|
||||
hostsFailedToUpgrade, err := processControlPlaneForUpgrade(ctx, kubeClient, controlHosts, localConnDialerFactory, prsMap, cpNodePlanMap, updateWorkersOnly, alpineImage, certMap,
|
||||
upgradeStrategy, newHosts, inactiveHosts, maxUnavailable, drainHelper)
|
||||
if err != nil {
|
||||
logrus.Errorf("Failed to upgrade hosts: %v with error %v", strings.Join(hostsFailedToUpgrade, ","), err)
|
||||
errMsgMaxUnavailableNotFailed = fmt.Sprintf("Failed to upgrade hosts: %v with error %v", strings.Join(hostsFailedToUpgrade, ","), err)
|
||||
return errMsgMaxUnavailableNotFailed, err
|
||||
if err != nil || inactiveHostErr != nil {
|
||||
if len(hostsFailedToUpgrade) > 0 {
|
||||
logrus.Errorf("Failed to upgrade hosts: %v with error %v", strings.Join(hostsFailedToUpgrade, ","), err)
|
||||
errMsgMaxUnavailableNotFailed = fmt.Sprintf("Failed to upgrade hosts: %v with error %v", strings.Join(hostsFailedToUpgrade, ","), err)
|
||||
}
|
||||
return errMsgMaxUnavailableNotFailed, util.ErrList([]error{err, inactiveHostErr})
|
||||
}
|
||||
log.Infof(ctx, "[%s] Successfully upgraded Controller Plane..", ControlRole)
|
||||
return errMsgMaxUnavailableNotFailed, nil
|
||||
|
@ -115,7 +115,7 @@ func CalculateMaxUnavailable(maxUnavailableVal string, numHosts int, role string
|
||||
return maxUnavailable, nil
|
||||
}
|
||||
|
||||
func resetMaxUnavailable(maxUnavailable, lenInactiveHosts int, component string) (int, error) {
|
||||
func ResetMaxUnavailable(maxUnavailable, lenInactiveHosts int, component string) (int, error) {
|
||||
if maxUnavailable > WorkerThreads {
|
||||
/* upgrading a large number of nodes in parallel leads to a large number of goroutines, which has led to errors regarding too many open sockets
|
||||
Because of this RKE switched to using workerpools. 50 workerthreads has been sufficient to optimize rke up, upgrading at most 50 nodes in parallel.
|
||||
|
@ -55,10 +55,6 @@ func RunWorkerPlane(ctx context.Context, allHosts []*hosts.Host, localConnDialer
|
||||
func UpgradeWorkerPlaneForWorkerAndEtcdNodes(ctx context.Context, kubeClient *kubernetes.Clientset, mixedRolesHosts []*hosts.Host, workerOnlyHosts []*hosts.Host, inactiveHosts map[string]bool, localConnDialerFactory hosts.DialerFactory, prsMap map[string]v3.PrivateRegistry, workerNodePlanMap map[string]v3.RKEConfigNodePlan, certMap map[string]pki.CertificatePKI, updateWorkersOnly bool, alpineImage string, upgradeStrategy *v3.NodeUpgradeStrategy, newHosts map[string]bool, maxUnavailable int) (string, error) {
|
||||
log.Infof(ctx, "[%s] Upgrading Worker Plane..", WorkerRole)
|
||||
var errMsgMaxUnavailableNotFailed string
|
||||
maxUnavailable, err := resetMaxUnavailable(maxUnavailable, len(inactiveHosts), WorkerRole)
|
||||
if err != nil {
|
||||
return errMsgMaxUnavailableNotFailed, err
|
||||
}
|
||||
updateNewHostsList(kubeClient, append(mixedRolesHosts, workerOnlyHosts...), newHosts)
|
||||
if len(mixedRolesHosts) > 0 {
|
||||
log.Infof(ctx, "First checking and processing worker components for upgrades on nodes with etcd role one at a time")
|
||||
|
Loading…
Reference in New Issue
Block a user