1
0
mirror of https://github.com/rancher/rke.git synced 2025-09-12 21:24:00 +00:00

Change RKE upgrade logic for zero downtime

This commit is contained in:
rajashree
2020-02-04 11:27:52 -08:00
parent 2fc40203ed
commit 11678a3f98
10 changed files with 628 additions and 27 deletions

View File

@@ -18,6 +18,8 @@ const (
InternalAddressAnnotation = "rke.cattle.io/internal-ip"
ExternalAddressAnnotation = "rke.cattle.io/external-ip"
AWSCloudProvider = "aws"
MaxRetries = 5
RetryInterval = 5
)
func DeleteNode(k8sClient *kubernetes.Clientset, nodeName, cloudProvider string) error {
@@ -37,26 +39,35 @@ func GetNodeList(k8sClient *kubernetes.Clientset) (*v1.NodeList, error) {
}
func GetNode(k8sClient *kubernetes.Clientset, nodeName string) (*v1.Node, error) {
nodes, err := GetNodeList(k8sClient)
if err != nil {
return nil, err
}
for _, node := range nodes.Items {
if strings.ToLower(node.Labels[HostnameLabel]) == strings.ToLower(nodeName) {
return &node, nil
var listErr error
for retries := 0; retries < MaxRetries; retries++ {
nodes, err := GetNodeList(k8sClient)
if err != nil {
listErr = err
time.Sleep(time.Second * RetryInterval)
continue
}
for _, node := range nodes.Items {
if strings.ToLower(node.Labels[HostnameLabel]) == strings.ToLower(nodeName) {
return &node, nil
}
}
time.Sleep(time.Second * RetryInterval)
}
if listErr != nil {
return nil, listErr
}
return nil, apierrors.NewNotFound(schema.GroupResource{}, nodeName)
}
func CordonUncordon(k8sClient *kubernetes.Clientset, nodeName string, cordoned bool) error {
updated := false
for retries := 0; retries <= 5; retries++ {
for retries := 0; retries < MaxRetries; retries++ {
node, err := GetNode(k8sClient, nodeName)
if err != nil {
logrus.Debugf("Error getting node %s: %v", nodeName, err)
time.Sleep(time.Second * 5)
continue
// no need to retry here since GetNode already retries
return err
}
if node.Spec.Unschedulable == cordoned {
logrus.Debugf("Node %s is already cordoned: %v", nodeName, cordoned)
@@ -66,7 +77,7 @@ func CordonUncordon(k8sClient *kubernetes.Clientset, nodeName string, cordoned b
_, err = k8sClient.CoreV1().Nodes().Update(node)
if err != nil {
logrus.Debugf("Error setting cordoned state for node %s: %v", nodeName, err)
time.Sleep(time.Second * 5)
time.Sleep(time.Second * RetryInterval)
continue
}
updated = true
@@ -80,7 +91,7 @@ func CordonUncordon(k8sClient *kubernetes.Clientset, nodeName string, cordoned b
func IsNodeReady(node v1.Node) bool {
nodeConditions := node.Status.Conditions
for _, condition := range nodeConditions {
if condition.Type == "Ready" && condition.Status == v1.ConditionTrue {
if condition.Type == v1.NodeReady && condition.Status == v1.ConditionTrue {
return true
}
}