mirror of
https://github.com/rancher/rke.git
synced 2025-08-29 19:53:12 +00:00
Merge pull request #1947 from mrajashree/bufixes
MaxUnavailable and powered off hosts comparison
This commit is contained in:
commit
47ba695d73
@ -166,6 +166,7 @@ func (c *Cluster) UpgradeControlPlane(ctx context.Context, kubeClient *kubernete
|
|||||||
inactiveHosts := make(map[string]bool)
|
inactiveHosts := make(map[string]bool)
|
||||||
var controlPlaneHosts, notReadyHosts []*hosts.Host
|
var controlPlaneHosts, notReadyHosts []*hosts.Host
|
||||||
var notReadyHostNames []string
|
var notReadyHostNames []string
|
||||||
|
var err error
|
||||||
|
|
||||||
for _, host := range c.InactiveHosts {
|
for _, host := range c.InactiveHosts {
|
||||||
// include only hosts with controlplane role
|
// include only hosts with controlplane role
|
||||||
@ -173,6 +174,10 @@ func (c *Cluster) UpgradeControlPlane(ctx context.Context, kubeClient *kubernete
|
|||||||
inactiveHosts[host.HostnameOverride] = true
|
inactiveHosts[host.HostnameOverride] = true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
c.MaxUnavailableForControlNodes, err = services.ResetMaxUnavailable(c.MaxUnavailableForControlNodes, len(inactiveHosts), services.ControlRole)
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
for _, host := range c.ControlPlaneHosts {
|
for _, host := range c.ControlPlaneHosts {
|
||||||
if !c.HostsLabeledToIgnoreUpgrade[host.Address] {
|
if !c.HostsLabeledToIgnoreUpgrade[host.Address] {
|
||||||
controlPlaneHosts = append(controlPlaneHosts, host)
|
controlPlaneHosts = append(controlPlaneHosts, host)
|
||||||
@ -265,6 +270,7 @@ func (c *Cluster) UpgradeWorkerPlane(ctx context.Context, kubeClient *kubernetes
|
|||||||
inactiveHosts := make(map[string]bool)
|
inactiveHosts := make(map[string]bool)
|
||||||
var notReadyHosts []*hosts.Host
|
var notReadyHosts []*hosts.Host
|
||||||
var notReadyHostNames []string
|
var notReadyHostNames []string
|
||||||
|
var err error
|
||||||
|
|
||||||
for _, host := range c.InactiveHosts {
|
for _, host := range c.InactiveHosts {
|
||||||
// if host has controlplane role, it already has worker components upgraded
|
// if host has controlplane role, it already has worker components upgraded
|
||||||
@ -272,6 +278,10 @@ func (c *Cluster) UpgradeWorkerPlane(ctx context.Context, kubeClient *kubernetes
|
|||||||
inactiveHosts[host.HostnameOverride] = true
|
inactiveHosts[host.HostnameOverride] = true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
c.MaxUnavailableForWorkerNodes, err = services.ResetMaxUnavailable(c.MaxUnavailableForWorkerNodes, len(inactiveHosts), services.WorkerRole)
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
for _, host := range append(etcdAndWorkerHosts, workerOnlyHosts...) {
|
for _, host := range append(etcdAndWorkerHosts, workerOnlyHosts...) {
|
||||||
if c.NewHosts[host.HostnameOverride] {
|
if c.NewHosts[host.HostnameOverride] {
|
||||||
continue
|
continue
|
||||||
|
@ -603,7 +603,7 @@ func GetExternalFlags(local, updateOnly, disablePortCheck bool, configDir, clust
|
|||||||
func (c *Cluster) setAddonsDefaults() {
|
func (c *Cluster) setAddonsDefaults() {
|
||||||
c.Ingress.UpdateStrategy = setDaemonsetAddonDefaults(c.Ingress.UpdateStrategy)
|
c.Ingress.UpdateStrategy = setDaemonsetAddonDefaults(c.Ingress.UpdateStrategy)
|
||||||
c.Network.UpdateStrategy = setDaemonsetAddonDefaults(c.Network.UpdateStrategy)
|
c.Network.UpdateStrategy = setDaemonsetAddonDefaults(c.Network.UpdateStrategy)
|
||||||
c.DNS.UpdateStrategy = setDeploymentAddonDefaults(c.DNS.UpdateStrategy)
|
c.DNS.UpdateStrategy = setDNSDeploymentAddonDefaults(c.DNS.UpdateStrategy, c.DNS.Provider)
|
||||||
if c.DNS.LinearAutoscalerParams == nil {
|
if c.DNS.LinearAutoscalerParams == nil {
|
||||||
c.DNS.LinearAutoscalerParams = &DefaultClusterProportionalAutoscalerLinearParams
|
c.DNS.LinearAutoscalerParams = &DefaultClusterProportionalAutoscalerLinearParams
|
||||||
}
|
}
|
||||||
@ -638,3 +638,43 @@ func setDeploymentAddonDefaults(updateStrategy *appsv1.DeploymentStrategy) *apps
|
|||||||
}
|
}
|
||||||
return updateStrategy
|
return updateStrategy
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func setDNSDeploymentAddonDefaults(updateStrategy *appsv1.DeploymentStrategy, dnsProvider string) *appsv1.DeploymentStrategy {
|
||||||
|
var (
|
||||||
|
coreDNSMaxUnavailable, coreDNSMaxSurge = intstr.FromInt(1), intstr.FromInt(0)
|
||||||
|
kubeDNSMaxSurge, kubeDNSMaxUnavailable = intstr.FromString("10%"), intstr.FromInt(0)
|
||||||
|
)
|
||||||
|
if updateStrategy != nil && updateStrategy.Type != appsv1.RollingUpdateDeploymentStrategyType {
|
||||||
|
return updateStrategy
|
||||||
|
}
|
||||||
|
switch dnsProvider {
|
||||||
|
case CoreDNSProvider:
|
||||||
|
if updateStrategy == nil || updateStrategy.RollingUpdate == nil {
|
||||||
|
return &appsv1.DeploymentStrategy{
|
||||||
|
Type: appsv1.RollingUpdateDeploymentStrategyType,
|
||||||
|
RollingUpdate: &appsv1.RollingUpdateDeployment{
|
||||||
|
MaxUnavailable: &coreDNSMaxUnavailable,
|
||||||
|
MaxSurge: &coreDNSMaxSurge,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if updateStrategy.RollingUpdate.MaxUnavailable == nil {
|
||||||
|
updateStrategy.RollingUpdate.MaxUnavailable = &coreDNSMaxUnavailable
|
||||||
|
}
|
||||||
|
case KubeDNSProvider:
|
||||||
|
if updateStrategy == nil || updateStrategy.RollingUpdate == nil {
|
||||||
|
return &appsv1.DeploymentStrategy{
|
||||||
|
Type: appsv1.RollingUpdateDeploymentStrategyType,
|
||||||
|
RollingUpdate: &appsv1.RollingUpdateDeployment{
|
||||||
|
MaxUnavailable: &kubeDNSMaxUnavailable,
|
||||||
|
MaxSurge: &kubeDNSMaxSurge,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if updateStrategy.RollingUpdate.MaxSurge == nil {
|
||||||
|
updateStrategy.RollingUpdate.MaxSurge = &kubeDNSMaxSurge
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return updateStrategy
|
||||||
|
}
|
||||||
|
@ -72,16 +72,22 @@ func UpgradeControlPlaneNodes(ctx context.Context, kubeClient *kubernetes.Client
|
|||||||
drainHelper = getDrainHelper(kubeClient, *upgradeStrategy)
|
drainHelper = getDrainHelper(kubeClient, *upgradeStrategy)
|
||||||
log.Infof(ctx, "[%s] Parameters provided to drain command: %#v", ControlRole, fmt.Sprintf("Force: %v, IgnoreAllDaemonSets: %v, DeleteLocalData: %v, Timeout: %v, GracePeriodSeconds: %v", drainHelper.Force, drainHelper.IgnoreAllDaemonSets, drainHelper.DeleteLocalData, drainHelper.Timeout, drainHelper.GracePeriodSeconds))
|
log.Infof(ctx, "[%s] Parameters provided to drain command: %#v", ControlRole, fmt.Sprintf("Force: %v, IgnoreAllDaemonSets: %v, DeleteLocalData: %v, Timeout: %v, GracePeriodSeconds: %v", drainHelper.Force, drainHelper.IgnoreAllDaemonSets, drainHelper.DeleteLocalData, drainHelper.Timeout, drainHelper.GracePeriodSeconds))
|
||||||
}
|
}
|
||||||
maxUnavailable, err := resetMaxUnavailable(maxUnavailable, len(inactiveHosts), ControlRole)
|
var inactiveHostErr error
|
||||||
if err != nil {
|
if len(inactiveHosts) > 0 {
|
||||||
return errMsgMaxUnavailableNotFailed, err
|
var inactiveHostNames []string
|
||||||
|
for hostName := range inactiveHosts {
|
||||||
|
inactiveHostNames = append(inactiveHostNames, hostName)
|
||||||
|
}
|
||||||
|
inactiveHostErr = fmt.Errorf("provisioning incomplete, host(s) [%s] skipped because they could not be contacted", strings.Join(inactiveHostNames, ","))
|
||||||
}
|
}
|
||||||
hostsFailedToUpgrade, err := processControlPlaneForUpgrade(ctx, kubeClient, controlHosts, localConnDialerFactory, prsMap, cpNodePlanMap, updateWorkersOnly, alpineImage, certMap,
|
hostsFailedToUpgrade, err := processControlPlaneForUpgrade(ctx, kubeClient, controlHosts, localConnDialerFactory, prsMap, cpNodePlanMap, updateWorkersOnly, alpineImage, certMap,
|
||||||
upgradeStrategy, newHosts, inactiveHosts, maxUnavailable, drainHelper)
|
upgradeStrategy, newHosts, inactiveHosts, maxUnavailable, drainHelper)
|
||||||
if err != nil {
|
if err != nil || inactiveHostErr != nil {
|
||||||
logrus.Errorf("Failed to upgrade hosts: %v with error %v", strings.Join(hostsFailedToUpgrade, ","), err)
|
if len(hostsFailedToUpgrade) > 0 {
|
||||||
errMsgMaxUnavailableNotFailed = fmt.Sprintf("Failed to upgrade hosts: %v with error %v", strings.Join(hostsFailedToUpgrade, ","), err)
|
logrus.Errorf("Failed to upgrade hosts: %v with error %v", strings.Join(hostsFailedToUpgrade, ","), err)
|
||||||
return errMsgMaxUnavailableNotFailed, err
|
errMsgMaxUnavailableNotFailed = fmt.Sprintf("Failed to upgrade hosts: %v with error %v", strings.Join(hostsFailedToUpgrade, ","), err)
|
||||||
|
}
|
||||||
|
return errMsgMaxUnavailableNotFailed, util.ErrList([]error{err, inactiveHostErr})
|
||||||
}
|
}
|
||||||
log.Infof(ctx, "[%s] Successfully upgraded Controller Plane..", ControlRole)
|
log.Infof(ctx, "[%s] Successfully upgraded Controller Plane..", ControlRole)
|
||||||
return errMsgMaxUnavailableNotFailed, nil
|
return errMsgMaxUnavailableNotFailed, nil
|
||||||
|
@ -115,7 +115,7 @@ func CalculateMaxUnavailable(maxUnavailableVal string, numHosts int, role string
|
|||||||
return maxUnavailable, nil
|
return maxUnavailable, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func resetMaxUnavailable(maxUnavailable, lenInactiveHosts int, component string) (int, error) {
|
func ResetMaxUnavailable(maxUnavailable, lenInactiveHosts int, component string) (int, error) {
|
||||||
if maxUnavailable > WorkerThreads {
|
if maxUnavailable > WorkerThreads {
|
||||||
/* upgrading a large number of nodes in parallel leads to a large number of goroutines, which has led to errors regarding too many open sockets
|
/* upgrading a large number of nodes in parallel leads to a large number of goroutines, which has led to errors regarding too many open sockets
|
||||||
Because of this RKE switched to using workerpools. 50 workerthreads has been sufficient to optimize rke up, upgrading at most 50 nodes in parallel.
|
Because of this RKE switched to using workerpools. 50 workerthreads has been sufficient to optimize rke up, upgrading at most 50 nodes in parallel.
|
||||||
|
@ -55,10 +55,6 @@ func RunWorkerPlane(ctx context.Context, allHosts []*hosts.Host, localConnDialer
|
|||||||
func UpgradeWorkerPlaneForWorkerAndEtcdNodes(ctx context.Context, kubeClient *kubernetes.Clientset, mixedRolesHosts []*hosts.Host, workerOnlyHosts []*hosts.Host, inactiveHosts map[string]bool, localConnDialerFactory hosts.DialerFactory, prsMap map[string]v3.PrivateRegistry, workerNodePlanMap map[string]v3.RKEConfigNodePlan, certMap map[string]pki.CertificatePKI, updateWorkersOnly bool, alpineImage string, upgradeStrategy *v3.NodeUpgradeStrategy, newHosts map[string]bool, maxUnavailable int) (string, error) {
|
func UpgradeWorkerPlaneForWorkerAndEtcdNodes(ctx context.Context, kubeClient *kubernetes.Clientset, mixedRolesHosts []*hosts.Host, workerOnlyHosts []*hosts.Host, inactiveHosts map[string]bool, localConnDialerFactory hosts.DialerFactory, prsMap map[string]v3.PrivateRegistry, workerNodePlanMap map[string]v3.RKEConfigNodePlan, certMap map[string]pki.CertificatePKI, updateWorkersOnly bool, alpineImage string, upgradeStrategy *v3.NodeUpgradeStrategy, newHosts map[string]bool, maxUnavailable int) (string, error) {
|
||||||
log.Infof(ctx, "[%s] Upgrading Worker Plane..", WorkerRole)
|
log.Infof(ctx, "[%s] Upgrading Worker Plane..", WorkerRole)
|
||||||
var errMsgMaxUnavailableNotFailed string
|
var errMsgMaxUnavailableNotFailed string
|
||||||
maxUnavailable, err := resetMaxUnavailable(maxUnavailable, len(inactiveHosts), WorkerRole)
|
|
||||||
if err != nil {
|
|
||||||
return errMsgMaxUnavailableNotFailed, err
|
|
||||||
}
|
|
||||||
updateNewHostsList(kubeClient, append(mixedRolesHosts, workerOnlyHosts...), newHosts)
|
updateNewHostsList(kubeClient, append(mixedRolesHosts, workerOnlyHosts...), newHosts)
|
||||||
if len(mixedRolesHosts) > 0 {
|
if len(mixedRolesHosts) > 0 {
|
||||||
log.Infof(ctx, "First checking and processing worker components for upgrades on nodes with etcd role one at a time")
|
log.Infof(ctx, "First checking and processing worker components for upgrades on nodes with etcd role one at a time")
|
||||||
|
Loading…
Reference in New Issue
Block a user