diff --git a/cluster/hosts.go b/cluster/hosts.go index 58847740..24b1cec9 100644 --- a/cluster/hosts.go +++ b/cluster/hosts.go @@ -89,8 +89,9 @@ func (c *Cluster) FindHostsLabeledToIgnoreUpgrade(ctx context.Context) { return } for _, node := range nodes.Items { - if val, ok := node.Labels[k8s.IgnoreHostDuringUpgradeLabel]; ok && val == "true" { + if val, ok := node.Labels[k8s.IgnoreHostDuringUpgradeLabel]; ok && val == k8s.IgnoreLabelValue { host := hosts.Host{RKEConfigNode: v3.RKEConfigNode{Address: node.Annotations[k8s.ExternalAddressAnnotation]}} + logrus.Infof("Host %v is labeled to ignore upgrade", host.Address) c.HostsLabeledToIgnoreUpgrade[host.Address] = true } } @@ -174,7 +175,7 @@ func (c *Cluster) CalculateMaxUnavailable() (int, int, error) { } // maxUnavailable should be calculated against all hosts provided in cluster.yml except the ones labelled to be ignored for upgrade workerHosts += len(inactiveWorkerHosts) - maxUnavailableWorker, err := services.CalculateMaxUnavailable(c.UpgradeStrategy.MaxUnavailableWorker, workerHosts) + maxUnavailableWorker, err := services.CalculateMaxUnavailable(c.UpgradeStrategy.MaxUnavailableWorker, workerHosts, services.WorkerRole) if err != nil { return maxUnavailableWorker, maxUnavailableControl, err } @@ -185,7 +186,7 @@ func (c *Cluster) CalculateMaxUnavailable() (int, int, error) { controlHosts++ } controlHosts += len(inactiveControlPlaneHosts) - maxUnavailableControl, err = services.CalculateMaxUnavailable(c.UpgradeStrategy.MaxUnavailableControlplane, controlHosts) + maxUnavailableControl, err = services.CalculateMaxUnavailable(c.UpgradeStrategy.MaxUnavailableControlplane, controlHosts, services.ControlRole) if err != nil { return maxUnavailableWorker, maxUnavailableControl, err } diff --git a/k8s/node.go b/k8s/node.go index 2b1b5a5e..186c68d4 100644 --- a/k8s/node.go +++ b/k8s/node.go @@ -17,7 +17,8 @@ const ( HostnameLabel = "kubernetes.io/hostname" InternalAddressAnnotation = "rke.cattle.io/internal-ip" ExternalAddressAnnotation = "rke.cattle.io/external-ip" - IgnoreHostDuringUpgradeLabel = "rke.cattle.io/ignore-during-upgrade" + IgnoreHostDuringUpgradeLabel = "user.cattle.io/upgrade-policy" + IgnoreLabelValue = "prevent" AWSCloudProvider = "aws" MaxRetries = 5 RetryInterval = 5 diff --git a/services/controlplane.go b/services/controlplane.go index 62adec85..a8b791de 100644 --- a/services/controlplane.go +++ b/services/controlplane.go @@ -80,10 +80,8 @@ func UpgradeControlPlaneNodes(ctx context.Context, kubeClient *kubernetes.Client upgradeStrategy, newHosts, inactiveHosts, maxUnavailable, drainHelper) if err != nil { logrus.Errorf("Failed to upgrade hosts: %v with error %v", strings.Join(hostsFailedToUpgrade, ","), err) - if len(hostsFailedToUpgrade) >= maxUnavailable { - return errMsgMaxUnavailableNotFailed, err - } errMsgMaxUnavailableNotFailed = fmt.Sprintf("Failed to upgrade hosts: %v with error %v", strings.Join(hostsFailedToUpgrade, ","), err) + return errMsgMaxUnavailableNotFailed, err } log.Infof(ctx, "[%s] Successfully upgraded Controller Plane..", ControlRole) return errMsgMaxUnavailableNotFailed, nil diff --git a/services/node_util.go b/services/node_util.go index d468c4a2..6a373050 100644 --- a/services/node_util.go +++ b/services/node_util.go @@ -3,6 +3,7 @@ package services import ( "bytes" "fmt" + "strings" "sync" "time" @@ -79,7 +80,7 @@ func getNodeListForUpgrade(kubeClient *kubernetes.Clientset, hostsFailed *sync.M if inactiveHosts[node.Labels[k8s.HostnameLabel]] { continue } - if val, ok := node.Labels[k8s.IgnoreHostDuringUpgradeLabel]; ok && val == "true" { + if val, ok := node.Labels[k8s.IgnoreHostDuringUpgradeLabel]; ok && val == k8s.IgnoreLabelValue { continue } nodeList = append(nodeList, node) @@ -87,10 +88,19 @@ func getNodeListForUpgrade(kubeClient *kubernetes.Clientset, hostsFailed *sync.M return nodeList, nil } -func CalculateMaxUnavailable(maxUnavailableVal string, numHosts int) (int, error) { +func CalculateMaxUnavailable(maxUnavailableVal string, numHosts int, role string) (int, error) { // if maxUnavailable is given in percent, round down maxUnavailableParsed := k8sutil.Parse(maxUnavailableVal) logrus.Debugf("Provided value for maxUnavailable: %v", maxUnavailableParsed) + if maxUnavailableParsed.Type == k8sutil.Int { + if maxUnavailableParsed.IntVal <= 0 { + return 0, fmt.Errorf("invalid input for max_unavailable_%s: %v, value must be > 0", role, maxUnavailableParsed.IntVal) + } + } else { + if strings.HasPrefix(maxUnavailableParsed.StrVal, "-") || maxUnavailableParsed.StrVal == "0%" { + return 0, fmt.Errorf("invalid input for max_unavailable_%s: %v, value must be > 0", role, maxUnavailableParsed.StrVal) + } + } maxUnavailable, err := k8sutil.GetValueFromIntOrPercent(&maxUnavailableParsed, numHosts, false) if err != nil { logrus.Errorf("Unable to parse max_unavailable, should be a number or percentage of nodes, error: %v", err) @@ -98,6 +108,7 @@ func CalculateMaxUnavailable(maxUnavailableVal string, numHosts int) (int, error } if maxUnavailable == 0 { // In case there is only one node and rounding down maxUnvailable percentage led to 0 + logrus.Infof("max_unavailable_%s got rounded down to 0, resetting to 1", role) maxUnavailable = 1 } logrus.Debugf("Parsed value of maxUnavailable: %v", maxUnavailable) @@ -114,11 +125,11 @@ func resetMaxUnavailable(maxUnavailable, lenInactiveHosts int, component string) } if lenInactiveHosts > 0 { - if maxUnavailable == lenInactiveHosts { - return 0, fmt.Errorf("cannot proceed with upgrade of %s since %v host(s) are found to be inactive prior to upgrade", component, lenInactiveHosts) + if lenInactiveHosts >= maxUnavailable { + return 0, fmt.Errorf("cannot proceed with upgrade of %s since %v host(s) cannot be reached prior to upgrade", component, lenInactiveHosts) } maxUnavailable -= lenInactiveHosts - logrus.Infof("Resetting %s to %v since %v host(s) are found to be inactive prior to upgrade", "max_unavailable_"+component, maxUnavailable, lenInactiveHosts) + logrus.Infof("Resetting %s to %v since %v host(s) cannot be reached prior to upgrade", "max_unavailable_"+component, maxUnavailable, lenInactiveHosts) } return maxUnavailable, nil }