1
0
mirror of https://github.com/rancher/rke.git synced 2025-08-29 19:53:12 +00:00

Merge pull request #1947 from mrajashree/bufixes

MaxUnavailable and powered off hosts comparison
This commit is contained in:
Rajashree Mandaogane 2020-03-08 10:12:30 -07:00 committed by GitHub
commit 47ba695d73
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 65 additions and 13 deletions

View File

@ -166,6 +166,7 @@ func (c *Cluster) UpgradeControlPlane(ctx context.Context, kubeClient *kubernete
inactiveHosts := make(map[string]bool)
var controlPlaneHosts, notReadyHosts []*hosts.Host
var notReadyHostNames []string
var err error
for _, host := range c.InactiveHosts {
// include only hosts with controlplane role
@ -173,6 +174,10 @@ func (c *Cluster) UpgradeControlPlane(ctx context.Context, kubeClient *kubernete
inactiveHosts[host.HostnameOverride] = true
}
}
c.MaxUnavailableForControlNodes, err = services.ResetMaxUnavailable(c.MaxUnavailableForControlNodes, len(inactiveHosts), services.ControlRole)
if err != nil {
return "", err
}
for _, host := range c.ControlPlaneHosts {
if !c.HostsLabeledToIgnoreUpgrade[host.Address] {
controlPlaneHosts = append(controlPlaneHosts, host)
@ -265,6 +270,7 @@ func (c *Cluster) UpgradeWorkerPlane(ctx context.Context, kubeClient *kubernetes
inactiveHosts := make(map[string]bool)
var notReadyHosts []*hosts.Host
var notReadyHostNames []string
var err error
for _, host := range c.InactiveHosts {
// if host has controlplane role, it already has worker components upgraded
@ -272,6 +278,10 @@ func (c *Cluster) UpgradeWorkerPlane(ctx context.Context, kubeClient *kubernetes
inactiveHosts[host.HostnameOverride] = true
}
}
c.MaxUnavailableForWorkerNodes, err = services.ResetMaxUnavailable(c.MaxUnavailableForWorkerNodes, len(inactiveHosts), services.WorkerRole)
if err != nil {
return "", err
}
for _, host := range append(etcdAndWorkerHosts, workerOnlyHosts...) {
if c.NewHosts[host.HostnameOverride] {
continue

View File

@ -603,7 +603,7 @@ func GetExternalFlags(local, updateOnly, disablePortCheck bool, configDir, clust
func (c *Cluster) setAddonsDefaults() {
c.Ingress.UpdateStrategy = setDaemonsetAddonDefaults(c.Ingress.UpdateStrategy)
c.Network.UpdateStrategy = setDaemonsetAddonDefaults(c.Network.UpdateStrategy)
c.DNS.UpdateStrategy = setDeploymentAddonDefaults(c.DNS.UpdateStrategy)
c.DNS.UpdateStrategy = setDNSDeploymentAddonDefaults(c.DNS.UpdateStrategy, c.DNS.Provider)
if c.DNS.LinearAutoscalerParams == nil {
c.DNS.LinearAutoscalerParams = &DefaultClusterProportionalAutoscalerLinearParams
}
@ -638,3 +638,43 @@ func setDeploymentAddonDefaults(updateStrategy *appsv1.DeploymentStrategy) *apps
}
return updateStrategy
}
func setDNSDeploymentAddonDefaults(updateStrategy *appsv1.DeploymentStrategy, dnsProvider string) *appsv1.DeploymentStrategy {
var (
coreDNSMaxUnavailable, coreDNSMaxSurge = intstr.FromInt(1), intstr.FromInt(0)
kubeDNSMaxSurge, kubeDNSMaxUnavailable = intstr.FromString("10%"), intstr.FromInt(0)
)
if updateStrategy != nil && updateStrategy.Type != appsv1.RollingUpdateDeploymentStrategyType {
return updateStrategy
}
switch dnsProvider {
case CoreDNSProvider:
if updateStrategy == nil || updateStrategy.RollingUpdate == nil {
return &appsv1.DeploymentStrategy{
Type: appsv1.RollingUpdateDeploymentStrategyType,
RollingUpdate: &appsv1.RollingUpdateDeployment{
MaxUnavailable: &coreDNSMaxUnavailable,
MaxSurge: &coreDNSMaxSurge,
},
}
}
if updateStrategy.RollingUpdate.MaxUnavailable == nil {
updateStrategy.RollingUpdate.MaxUnavailable = &coreDNSMaxUnavailable
}
case KubeDNSProvider:
if updateStrategy == nil || updateStrategy.RollingUpdate == nil {
return &appsv1.DeploymentStrategy{
Type: appsv1.RollingUpdateDeploymentStrategyType,
RollingUpdate: &appsv1.RollingUpdateDeployment{
MaxUnavailable: &kubeDNSMaxUnavailable,
MaxSurge: &kubeDNSMaxSurge,
},
}
}
if updateStrategy.RollingUpdate.MaxSurge == nil {
updateStrategy.RollingUpdate.MaxSurge = &kubeDNSMaxSurge
}
}
return updateStrategy
}

View File

@ -72,16 +72,22 @@ func UpgradeControlPlaneNodes(ctx context.Context, kubeClient *kubernetes.Client
drainHelper = getDrainHelper(kubeClient, *upgradeStrategy)
log.Infof(ctx, "[%s] Parameters provided to drain command: %#v", ControlRole, fmt.Sprintf("Force: %v, IgnoreAllDaemonSets: %v, DeleteLocalData: %v, Timeout: %v, GracePeriodSeconds: %v", drainHelper.Force, drainHelper.IgnoreAllDaemonSets, drainHelper.DeleteLocalData, drainHelper.Timeout, drainHelper.GracePeriodSeconds))
}
maxUnavailable, err := resetMaxUnavailable(maxUnavailable, len(inactiveHosts), ControlRole)
if err != nil {
return errMsgMaxUnavailableNotFailed, err
var inactiveHostErr error
if len(inactiveHosts) > 0 {
var inactiveHostNames []string
for hostName := range inactiveHosts {
inactiveHostNames = append(inactiveHostNames, hostName)
}
inactiveHostErr = fmt.Errorf("provisioning incomplete, host(s) [%s] skipped because they could not be contacted", strings.Join(inactiveHostNames, ","))
}
hostsFailedToUpgrade, err := processControlPlaneForUpgrade(ctx, kubeClient, controlHosts, localConnDialerFactory, prsMap, cpNodePlanMap, updateWorkersOnly, alpineImage, certMap,
upgradeStrategy, newHosts, inactiveHosts, maxUnavailable, drainHelper)
if err != nil {
logrus.Errorf("Failed to upgrade hosts: %v with error %v", strings.Join(hostsFailedToUpgrade, ","), err)
errMsgMaxUnavailableNotFailed = fmt.Sprintf("Failed to upgrade hosts: %v with error %v", strings.Join(hostsFailedToUpgrade, ","), err)
return errMsgMaxUnavailableNotFailed, err
if err != nil || inactiveHostErr != nil {
if len(hostsFailedToUpgrade) > 0 {
logrus.Errorf("Failed to upgrade hosts: %v with error %v", strings.Join(hostsFailedToUpgrade, ","), err)
errMsgMaxUnavailableNotFailed = fmt.Sprintf("Failed to upgrade hosts: %v with error %v", strings.Join(hostsFailedToUpgrade, ","), err)
}
return errMsgMaxUnavailableNotFailed, util.ErrList([]error{err, inactiveHostErr})
}
log.Infof(ctx, "[%s] Successfully upgraded Controller Plane..", ControlRole)
return errMsgMaxUnavailableNotFailed, nil

View File

@ -115,7 +115,7 @@ func CalculateMaxUnavailable(maxUnavailableVal string, numHosts int, role string
return maxUnavailable, nil
}
func resetMaxUnavailable(maxUnavailable, lenInactiveHosts int, component string) (int, error) {
func ResetMaxUnavailable(maxUnavailable, lenInactiveHosts int, component string) (int, error) {
if maxUnavailable > WorkerThreads {
/* upgrading a large number of nodes in parallel leads to a large number of goroutines, which has led to errors regarding too many open sockets
Because of this RKE switched to using workerpools. 50 workerthreads has been sufficient to optimize rke up, upgrading at most 50 nodes in parallel.

View File

@ -55,10 +55,6 @@ func RunWorkerPlane(ctx context.Context, allHosts []*hosts.Host, localConnDialer
func UpgradeWorkerPlaneForWorkerAndEtcdNodes(ctx context.Context, kubeClient *kubernetes.Clientset, mixedRolesHosts []*hosts.Host, workerOnlyHosts []*hosts.Host, inactiveHosts map[string]bool, localConnDialerFactory hosts.DialerFactory, prsMap map[string]v3.PrivateRegistry, workerNodePlanMap map[string]v3.RKEConfigNodePlan, certMap map[string]pki.CertificatePKI, updateWorkersOnly bool, alpineImage string, upgradeStrategy *v3.NodeUpgradeStrategy, newHosts map[string]bool, maxUnavailable int) (string, error) {
log.Infof(ctx, "[%s] Upgrading Worker Plane..", WorkerRole)
var errMsgMaxUnavailableNotFailed string
maxUnavailable, err := resetMaxUnavailable(maxUnavailable, len(inactiveHosts), WorkerRole)
if err != nil {
return errMsgMaxUnavailableNotFailed, err
}
updateNewHostsList(kubeClient, append(mixedRolesHosts, workerOnlyHosts...), newHosts)
if len(mixedRolesHosts) > 0 {
log.Infof(ctx, "First checking and processing worker components for upgrades on nodes with etcd role one at a time")