mirror of
https://github.com/rancher/rke.git
synced 2025-08-19 07:17:30 +00:00
Merge pull request #1943 from mrajashree/rename_label
Rename ignore label and return error on controlplane failure
This commit is contained in:
commit
b9b29be0e5
@ -89,8 +89,9 @@ func (c *Cluster) FindHostsLabeledToIgnoreUpgrade(ctx context.Context) {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
for _, node := range nodes.Items {
|
for _, node := range nodes.Items {
|
||||||
if val, ok := node.Labels[k8s.IgnoreHostDuringUpgradeLabel]; ok && val == "true" {
|
if val, ok := node.Labels[k8s.IgnoreHostDuringUpgradeLabel]; ok && val == k8s.IgnoreLabelValue {
|
||||||
host := hosts.Host{RKEConfigNode: v3.RKEConfigNode{Address: node.Annotations[k8s.ExternalAddressAnnotation]}}
|
host := hosts.Host{RKEConfigNode: v3.RKEConfigNode{Address: node.Annotations[k8s.ExternalAddressAnnotation]}}
|
||||||
|
logrus.Infof("Host %v is labeled to ignore upgrade", host.Address)
|
||||||
c.HostsLabeledToIgnoreUpgrade[host.Address] = true
|
c.HostsLabeledToIgnoreUpgrade[host.Address] = true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -174,7 +175,7 @@ func (c *Cluster) CalculateMaxUnavailable() (int, int, error) {
|
|||||||
}
|
}
|
||||||
// maxUnavailable should be calculated against all hosts provided in cluster.yml except the ones labelled to be ignored for upgrade
|
// maxUnavailable should be calculated against all hosts provided in cluster.yml except the ones labelled to be ignored for upgrade
|
||||||
workerHosts += len(inactiveWorkerHosts)
|
workerHosts += len(inactiveWorkerHosts)
|
||||||
maxUnavailableWorker, err := services.CalculateMaxUnavailable(c.UpgradeStrategy.MaxUnavailableWorker, workerHosts)
|
maxUnavailableWorker, err := services.CalculateMaxUnavailable(c.UpgradeStrategy.MaxUnavailableWorker, workerHosts, services.WorkerRole)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return maxUnavailableWorker, maxUnavailableControl, err
|
return maxUnavailableWorker, maxUnavailableControl, err
|
||||||
}
|
}
|
||||||
@ -185,7 +186,7 @@ func (c *Cluster) CalculateMaxUnavailable() (int, int, error) {
|
|||||||
controlHosts++
|
controlHosts++
|
||||||
}
|
}
|
||||||
controlHosts += len(inactiveControlPlaneHosts)
|
controlHosts += len(inactiveControlPlaneHosts)
|
||||||
maxUnavailableControl, err = services.CalculateMaxUnavailable(c.UpgradeStrategy.MaxUnavailableControlplane, controlHosts)
|
maxUnavailableControl, err = services.CalculateMaxUnavailable(c.UpgradeStrategy.MaxUnavailableControlplane, controlHosts, services.ControlRole)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return maxUnavailableWorker, maxUnavailableControl, err
|
return maxUnavailableWorker, maxUnavailableControl, err
|
||||||
}
|
}
|
||||||
|
@ -17,7 +17,8 @@ const (
|
|||||||
HostnameLabel = "kubernetes.io/hostname"
|
HostnameLabel = "kubernetes.io/hostname"
|
||||||
InternalAddressAnnotation = "rke.cattle.io/internal-ip"
|
InternalAddressAnnotation = "rke.cattle.io/internal-ip"
|
||||||
ExternalAddressAnnotation = "rke.cattle.io/external-ip"
|
ExternalAddressAnnotation = "rke.cattle.io/external-ip"
|
||||||
IgnoreHostDuringUpgradeLabel = "rke.cattle.io/ignore-during-upgrade"
|
IgnoreHostDuringUpgradeLabel = "user.cattle.io/upgrade-policy"
|
||||||
|
IgnoreLabelValue = "prevent"
|
||||||
AWSCloudProvider = "aws"
|
AWSCloudProvider = "aws"
|
||||||
MaxRetries = 5
|
MaxRetries = 5
|
||||||
RetryInterval = 5
|
RetryInterval = 5
|
||||||
|
@ -80,10 +80,8 @@ func UpgradeControlPlaneNodes(ctx context.Context, kubeClient *kubernetes.Client
|
|||||||
upgradeStrategy, newHosts, inactiveHosts, maxUnavailable, drainHelper)
|
upgradeStrategy, newHosts, inactiveHosts, maxUnavailable, drainHelper)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
logrus.Errorf("Failed to upgrade hosts: %v with error %v", strings.Join(hostsFailedToUpgrade, ","), err)
|
logrus.Errorf("Failed to upgrade hosts: %v with error %v", strings.Join(hostsFailedToUpgrade, ","), err)
|
||||||
if len(hostsFailedToUpgrade) >= maxUnavailable {
|
|
||||||
return errMsgMaxUnavailableNotFailed, err
|
|
||||||
}
|
|
||||||
errMsgMaxUnavailableNotFailed = fmt.Sprintf("Failed to upgrade hosts: %v with error %v", strings.Join(hostsFailedToUpgrade, ","), err)
|
errMsgMaxUnavailableNotFailed = fmt.Sprintf("Failed to upgrade hosts: %v with error %v", strings.Join(hostsFailedToUpgrade, ","), err)
|
||||||
|
return errMsgMaxUnavailableNotFailed, err
|
||||||
}
|
}
|
||||||
log.Infof(ctx, "[%s] Successfully upgraded Controller Plane..", ControlRole)
|
log.Infof(ctx, "[%s] Successfully upgraded Controller Plane..", ControlRole)
|
||||||
return errMsgMaxUnavailableNotFailed, nil
|
return errMsgMaxUnavailableNotFailed, nil
|
||||||
|
@ -3,6 +3,7 @@ package services
|
|||||||
import (
|
import (
|
||||||
"bytes"
|
"bytes"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
@ -79,7 +80,7 @@ func getNodeListForUpgrade(kubeClient *kubernetes.Clientset, hostsFailed *sync.M
|
|||||||
if inactiveHosts[node.Labels[k8s.HostnameLabel]] {
|
if inactiveHosts[node.Labels[k8s.HostnameLabel]] {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
if val, ok := node.Labels[k8s.IgnoreHostDuringUpgradeLabel]; ok && val == "true" {
|
if val, ok := node.Labels[k8s.IgnoreHostDuringUpgradeLabel]; ok && val == k8s.IgnoreLabelValue {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
nodeList = append(nodeList, node)
|
nodeList = append(nodeList, node)
|
||||||
@ -87,10 +88,19 @@ func getNodeListForUpgrade(kubeClient *kubernetes.Clientset, hostsFailed *sync.M
|
|||||||
return nodeList, nil
|
return nodeList, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func CalculateMaxUnavailable(maxUnavailableVal string, numHosts int) (int, error) {
|
func CalculateMaxUnavailable(maxUnavailableVal string, numHosts int, role string) (int, error) {
|
||||||
// if maxUnavailable is given in percent, round down
|
// if maxUnavailable is given in percent, round down
|
||||||
maxUnavailableParsed := k8sutil.Parse(maxUnavailableVal)
|
maxUnavailableParsed := k8sutil.Parse(maxUnavailableVal)
|
||||||
logrus.Debugf("Provided value for maxUnavailable: %v", maxUnavailableParsed)
|
logrus.Debugf("Provided value for maxUnavailable: %v", maxUnavailableParsed)
|
||||||
|
if maxUnavailableParsed.Type == k8sutil.Int {
|
||||||
|
if maxUnavailableParsed.IntVal <= 0 {
|
||||||
|
return 0, fmt.Errorf("invalid input for max_unavailable_%s: %v, value must be > 0", role, maxUnavailableParsed.IntVal)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if strings.HasPrefix(maxUnavailableParsed.StrVal, "-") || maxUnavailableParsed.StrVal == "0%" {
|
||||||
|
return 0, fmt.Errorf("invalid input for max_unavailable_%s: %v, value must be > 0", role, maxUnavailableParsed.StrVal)
|
||||||
|
}
|
||||||
|
}
|
||||||
maxUnavailable, err := k8sutil.GetValueFromIntOrPercent(&maxUnavailableParsed, numHosts, false)
|
maxUnavailable, err := k8sutil.GetValueFromIntOrPercent(&maxUnavailableParsed, numHosts, false)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
logrus.Errorf("Unable to parse max_unavailable, should be a number or percentage of nodes, error: %v", err)
|
logrus.Errorf("Unable to parse max_unavailable, should be a number or percentage of nodes, error: %v", err)
|
||||||
@ -98,6 +108,7 @@ func CalculateMaxUnavailable(maxUnavailableVal string, numHosts int) (int, error
|
|||||||
}
|
}
|
||||||
if maxUnavailable == 0 {
|
if maxUnavailable == 0 {
|
||||||
// In case there is only one node and rounding down maxUnvailable percentage led to 0
|
// In case there is only one node and rounding down maxUnvailable percentage led to 0
|
||||||
|
logrus.Infof("max_unavailable_%s got rounded down to 0, resetting to 1", role)
|
||||||
maxUnavailable = 1
|
maxUnavailable = 1
|
||||||
}
|
}
|
||||||
logrus.Debugf("Parsed value of maxUnavailable: %v", maxUnavailable)
|
logrus.Debugf("Parsed value of maxUnavailable: %v", maxUnavailable)
|
||||||
@ -114,11 +125,11 @@ func resetMaxUnavailable(maxUnavailable, lenInactiveHosts int, component string)
|
|||||||
}
|
}
|
||||||
|
|
||||||
if lenInactiveHosts > 0 {
|
if lenInactiveHosts > 0 {
|
||||||
if maxUnavailable == lenInactiveHosts {
|
if lenInactiveHosts >= maxUnavailable {
|
||||||
return 0, fmt.Errorf("cannot proceed with upgrade of %s since %v host(s) are found to be inactive prior to upgrade", component, lenInactiveHosts)
|
return 0, fmt.Errorf("cannot proceed with upgrade of %s since %v host(s) cannot be reached prior to upgrade", component, lenInactiveHosts)
|
||||||
}
|
}
|
||||||
maxUnavailable -= lenInactiveHosts
|
maxUnavailable -= lenInactiveHosts
|
||||||
logrus.Infof("Resetting %s to %v since %v host(s) are found to be inactive prior to upgrade", "max_unavailable_"+component, maxUnavailable, lenInactiveHosts)
|
logrus.Infof("Resetting %s to %v since %v host(s) cannot be reached prior to upgrade", "max_unavailable_"+component, maxUnavailable, lenInactiveHosts)
|
||||||
}
|
}
|
||||||
return maxUnavailable, nil
|
return maxUnavailable, nil
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user