1
0
mirror of https://github.com/rancher/rke.git synced 2025-09-15 22:49:13 +00:00

Attempt upgrade on NotReady hosts

This commit is contained in:
rajashree
2020-02-26 13:33:22 -08:00
parent ca3a3b1814
commit e27a05f8b1
10 changed files with 313 additions and 177 deletions

View File

@@ -11,13 +11,14 @@ import (
v3 "github.com/rancher/types/apis/management.cattle.io/v3"
"github.com/sirupsen/logrus"
"k8s.io/api/core/v1"
k8sutil "k8s.io/apimachinery/pkg/util/intstr"
"k8s.io/client-go/kubernetes"
"k8s.io/kubectl/pkg/drain"
)
func checkNodeReady(kubeClient *kubernetes.Clientset, runHost *hosts.Host, component string) error {
func CheckNodeReady(kubeClient *kubernetes.Clientset, runHost *hosts.Host, component string) error {
for retries := 0; retries < k8s.MaxRetries; retries++ {
logrus.Debugf("[%s] Now checking status of node %v", component, runHost.HostnameOverride)
logrus.Infof("[%s] Now checking status of node %v", component, runHost.HostnameOverride)
k8sNode, err := k8s.GetNode(kubeClient, runHost.HostnameOverride)
if err != nil {
return fmt.Errorf("[%s] Error getting node %v: %v", component, runHost.HostnameOverride, err)
@@ -60,12 +61,13 @@ func getDrainHelper(kubeClient *kubernetes.Clientset, upgradeStrategy v3.NodeUpg
return drainHelper
}
func getNodeListForUpgrade(kubeClient *kubernetes.Clientset, hostsFailed *sync.Map, newHosts, inactiveHosts map[string]bool) ([]v1.Node, error) {
func getNodeListForUpgrade(kubeClient *kubernetes.Clientset, hostsFailed *sync.Map, newHosts, inactiveHosts map[string]bool, component string) ([]v1.Node, error) {
var nodeList []v1.Node
nodes, err := k8s.GetNodeList(kubeClient)
if err != nil {
return nodeList, err
}
logrus.Infof("[%s] Getting list of nodes for upgrade", component)
for _, node := range nodes.Items {
if _, ok := hostsFailed.Load(node.Labels[k8s.HostnameLabel]); ok {
continue
@@ -84,3 +86,36 @@ func getNodeListForUpgrade(kubeClient *kubernetes.Clientset, hostsFailed *sync.M
}
return nodeList, nil
}
func CalculateMaxUnavailable(maxUnavailableVal string, numHosts int) (int, error) {
// if maxUnavailable is given in percent, round down
maxUnavailableParsed := k8sutil.Parse(maxUnavailableVal)
logrus.Debugf("Provided value for maxUnavailable: %v", maxUnavailableParsed)
maxUnavailable, err := k8sutil.GetValueFromIntOrPercent(&maxUnavailableParsed, numHosts, false)
if err != nil {
logrus.Errorf("Unable to parse max_unavailable, should be a number or percentage of nodes, error: %v", err)
return 0, err
}
if maxUnavailable == 0 {
// In case there is only one node and rounding down maxUnvailable percentage led to 0
maxUnavailable = 1
}
logrus.Debugf("Parsed value of maxUnavailable: %v", maxUnavailable)
return maxUnavailable, nil
}
func resetMaxUnavailable(maxUnavailable, lenInactiveHosts int) int {
if maxUnavailable > WorkerThreads {
/* upgrading a large number of nodes in parallel leads to a large number of goroutines, which has led to errors regarding too many open sockets
Because of this RKE switched to using workerpools. 50 workerthreads has been sufficient to optimize rke up, upgrading at most 50 nodes in parallel.
So the user configurable maxUnavailable will be respected only as long as it's less than 50 and capped at 50 */
maxUnavailable = WorkerThreads
logrus.Info("Resetting maxUnavailable to 50, to avoid issues related to upgrading large number of nodes in parallel")
}
if lenInactiveHosts > 0 {
maxUnavailable -= lenInactiveHosts
logrus.Infof("Resetting maxUnavailable to %v since %v host(s) are found to be inactive/unavailable prior to upgrade", maxUnavailable, lenInactiveHosts)
}
return maxUnavailable
}