1
0
mirror of https://github.com/rancher/rke.git synced 2025-08-02 07:43:04 +00:00

Merge pull request #3443 from kinarashah/hstname

This commit is contained in:
Sergey Nasovich 2023-11-28 12:54:46 -05:00 committed by GitHub
commit 359128accd
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 33 additions and 22 deletions

View File

@ -492,7 +492,7 @@ func (c *Cluster) doAddonDeploy(ctx context.Context, addonYaml, resourceName str
if err != nil {
return &addonError{fmt.Sprintf("%v", err), isCritical}
}
node, err := k8s.GetNode(k8sClient, c.ControlPlaneHosts[0].HostnameOverride, c.CloudProvider.Name)
node, err := k8s.GetNode(k8sClient, c.ControlPlaneHosts[0].HostnameOverride, c.ControlPlaneHosts[0].InternalAddress, c.CloudProvider.Name)
if err != nil {
return &addonError{fmt.Sprintf("Failed to get Node [%s]: %v", c.ControlPlaneHosts[0].HostnameOverride, err), isCritical}
}
@ -513,7 +513,7 @@ func (c *Cluster) doAddonDelete(ctx context.Context, resourceName string, isCrit
if err != nil {
return &addonError{fmt.Sprintf("%v", err), isCritical}
}
node, err := k8s.GetNode(k8sClient, c.ControlPlaneHosts[0].HostnameOverride, c.CloudProvider.Name)
node, err := k8s.GetNode(k8sClient, c.ControlPlaneHosts[0].HostnameOverride, c.ControlPlaneHosts[0].InternalAddress, c.CloudProvider.Name)
if err != nil {
return &addonError{fmt.Sprintf("Failed to get Node [%s]: %v", c.ControlPlaneHosts[0].HostnameOverride, err), isCritical}
}

View File

@ -1018,13 +1018,12 @@ func setNodeAnnotationsLabelsTaints(k8sClient *kubernetes.Clientset, host *hosts
node := &v1.Node{}
var err error
for retries := 0; retries <= 5; retries++ {
node, err = k8s.GetNode(k8sClient, host.HostnameOverride, cloudProviderName)
node, err = k8s.GetNode(k8sClient, host.HostnameOverride, host.InternalAddress, cloudProviderName)
if err != nil {
logrus.Debugf("[hosts] Can't find node by name [%s], error: %v", host.HostnameOverride, err)
time.Sleep(2 * time.Second)
continue
}
oldNode := node.DeepCopy()
k8s.SetNodeAddressesAnnotations(node, host.InternalAddress, host.Address)
k8s.SyncNodeLabels(node, host.ToAddLabels, host.ToDelLabels)

View File

@ -11,6 +11,7 @@ import (
v3 "github.com/rancher/rke/types"
"github.com/rancher/rke/util"
"golang.org/x/sync/errgroup"
v1 "k8s.io/api/core/v1"
)
func (c *Cluster) ClusterRemove(ctx context.Context) error {
@ -92,7 +93,13 @@ func (c *Cluster) RemoveOldNodes(ctx context.Context) error {
host := &hosts.Host{}
host.HostnameOverride = node.Name
if !hosts.IsNodeInList(host, uniqueHosts) {
if err := k8s.DeleteNode(kubeClient, node.Name, c.CloudProvider.Name); err != nil {
nodeAddress := ""
for _, addr := range node.Status.Addresses {
if addr.Type == v1.NodeInternalIP {
nodeAddress = addr.Address
}
}
if err := k8s.DeleteNode(kubeClient, node.Name, nodeAddress, c.CloudProvider.Name); err != nil {
log.Warnf(ctx, "Failed to delete old node [%s] from kubernetes")
}
}

View File

@ -197,7 +197,7 @@ func DeleteNode(ctx context.Context, toDeleteHost *Host, kubeClient *kubernetes.
return nil
}
log.Infof(ctx, "[hosts] Cordoning host [%s]", toDeleteHost.Address)
if _, err := k8s.GetNode(kubeClient, toDeleteHost.HostnameOverride, cloudProviderName); err != nil {
if _, err := k8s.GetNode(kubeClient, toDeleteHost.HostnameOverride, toDeleteHost.InternalAddress, cloudProviderName); err != nil {
if apierrors.IsNotFound(err) {
log.Warnf(ctx, "[hosts] Can't find node by name [%s]", toDeleteHost.Address)
return nil
@ -205,11 +205,11 @@ func DeleteNode(ctx context.Context, toDeleteHost *Host, kubeClient *kubernetes.
return err
}
if err := k8s.CordonUncordon(kubeClient, toDeleteHost.HostnameOverride, cloudProviderName, true); err != nil {
if err := k8s.CordonUncordon(kubeClient, toDeleteHost.HostnameOverride, toDeleteHost.InternalAddress, cloudProviderName, true); err != nil {
return err
}
log.Infof(ctx, "[hosts] Deleting host [%s] from the cluster", toDeleteHost.Address)
if err := k8s.DeleteNode(kubeClient, toDeleteHost.HostnameOverride, cloudProviderName); err != nil {
if err := k8s.DeleteNode(kubeClient, toDeleteHost.HostnameOverride, toDeleteHost.InternalAddress, cloudProviderName); err != nil {
return err
}
log.Infof(ctx, "[hosts] Successfully deleted host [%s] from the cluster", toDeleteHost.Address)

View File

@ -24,10 +24,10 @@ const (
RetryInterval = 5
)
func DeleteNode(k8sClient *kubernetes.Clientset, nodeName, cloudProviderName string) error {
func DeleteNode(k8sClient *kubernetes.Clientset, nodeName string, nodeAddress string, cloudProviderName string) error {
// If cloud provider is configured, the node name can be set by the cloud provider, which can be different from the original node name
if cloudProviderName != "" {
node, err := GetNode(k8sClient, nodeName, cloudProviderName)
node, err := GetNode(k8sClient, nodeName, nodeAddress, cloudProviderName)
if err != nil {
return err
}
@ -40,7 +40,7 @@ func GetNodeList(k8sClient *kubernetes.Clientset) (*v1.NodeList, error) {
return k8sClient.CoreV1().Nodes().List(context.TODO(), metav1.ListOptions{})
}
func GetNode(k8sClient *kubernetes.Clientset, nodeName, cloudProviderName string) (*v1.Node, error) {
func GetNode(k8sClient *kubernetes.Clientset, nodeName, nodeAddress, cloudProviderName string) (*v1.Node, error) {
var listErr error
for retries := 0; retries < MaxRetries; retries++ {
logrus.Debugf("Checking node list for node [%v], try #%v", nodeName, retries+1)
@ -57,9 +57,13 @@ func GetNode(k8sClient *kubernetes.Clientset, nodeName, cloudProviderName string
return &node, nil
}
if cloudProviderName == ExternalAWSCloudProviderName {
logrus.Debugf("Checking hostname address for node [%v], cloud provider: %v", nodeName, cloudProviderName)
if nodeAddress == "" {
return nil, fmt.Errorf("failed to find node [%v] with empty nodeAddress, cloud provider: %v", nodeName, cloudProviderName)
}
logrus.Debugf("Checking internal address for node [%v], cloud provider: %v", nodeName, cloudProviderName)
for _, addr := range node.Status.Addresses {
if addr.Type == v1.NodeHostName && strings.ToLower(node.Labels[HostnameLabel]) == addr.Address {
if addr.Type == v1.NodeInternalIP && nodeAddress == addr.Address {
logrus.Debugf("Found node [%s]: %v", nodeName, nodeAddress)
return &node, nil
}
}
@ -73,10 +77,10 @@ func GetNode(k8sClient *kubernetes.Clientset, nodeName, cloudProviderName string
return nil, apierrors.NewNotFound(schema.GroupResource{}, nodeName)
}
func CordonUncordon(k8sClient *kubernetes.Clientset, nodeName string, cloudProviderName string, cordoned bool) error {
func CordonUncordon(k8sClient *kubernetes.Clientset, nodeName string, nodeAddress string, cloudProviderName string, cordoned bool) error {
updated := false
for retries := 0; retries < MaxRetries; retries++ {
node, err := GetNode(k8sClient, nodeName, cloudProviderName)
node, err := GetNode(k8sClient, nodeName, nodeAddress, cloudProviderName)
if err != nil {
logrus.Debugf("Error getting node %s: %v", nodeName, err)
// no need to retry here since GetNode already retries
@ -127,6 +131,7 @@ func SyncNodeLabels(node *v1.Node, toAddLabels, toDelLabels map[string]string) {
delete(node.Labels, key)
}
}
// ADD Labels
for key, value := range toAddLabels {
node.Labels[key] = value

View File

@ -165,7 +165,7 @@ func processControlPlaneForUpgrade(ctx context.Context, kubeClient *kubernetes.C
}
if !controlPlaneUpgradable && !workerPlaneUpgradable {
log.Infof(ctx, "Upgrade not required for controlplane and worker components of host %v", runHost.HostnameOverride)
if err := k8s.CordonUncordon(kubeClient, runHost.HostnameOverride, cloudProviderName, false); err != nil {
if err := k8s.CordonUncordon(kubeClient, runHost.HostnameOverride, runHost.InternalAddress, cloudProviderName, false); err != nil {
// This node didn't undergo an upgrade, so RKE will only log any error after uncordoning it and won't count this in maxUnavailable
logrus.Errorf("[controlplane] Failed to uncordon node %v, error: %v", runHost.HostnameOverride, err)
}
@ -237,7 +237,7 @@ func upgradeControlHost(ctx context.Context, kubeClient *kubernetes.Clientset, h
if err := CheckNodeReady(kubeClient, host, ControlRole, cloudProviderName); err != nil {
return err
}
return k8s.CordonUncordon(kubeClient, host.HostnameOverride, cloudProviderName, false)
return k8s.CordonUncordon(kubeClient, host.HostnameOverride, host.InternalAddress, cloudProviderName, false)
}
func RemoveControlPlane(ctx context.Context, controlHosts []*hosts.Host, force bool) error {

View File

@ -20,7 +20,7 @@ import (
func CheckNodeReady(kubeClient *kubernetes.Clientset, runHost *hosts.Host, component, cloudProviderName string) error {
for retries := 0; retries < k8s.MaxRetries; retries++ {
logrus.Infof("[%s] Now checking status of node %v, try #%v", component, runHost.HostnameOverride, retries+1)
k8sNode, err := k8s.GetNode(kubeClient, runHost.HostnameOverride, cloudProviderName)
k8sNode, err := k8s.GetNode(kubeClient, runHost.HostnameOverride, runHost.InternalAddress, cloudProviderName)
if err != nil {
return fmt.Errorf("[%s] Error getting node %v: %v", component, runHost.HostnameOverride, err)
}
@ -35,7 +35,7 @@ func CheckNodeReady(kubeClient *kubernetes.Clientset, runHost *hosts.Host, compo
func cordonAndDrainNode(kubeClient *kubernetes.Clientset, host *hosts.Host, drainNode bool, drainHelper drain.Helper, component, cloudProviderName string) error {
logrus.Debugf("[%s] Cordoning node %v", component, host.HostnameOverride)
if err := k8s.CordonUncordon(kubeClient, host.HostnameOverride, cloudProviderName, true); err != nil {
if err := k8s.CordonUncordon(kubeClient, host.HostnameOverride, host.InternalAddress, cloudProviderName, true); err != nil {
return err
}
if !drainNode {

View File

@ -87,7 +87,7 @@ func UpgradeWorkerPlaneForWorkerAndEtcdNodes(ctx context.Context, kubeClient *ku
func updateNewHostsList(kubeClient *kubernetes.Clientset, allHosts []*hosts.Host, newHosts map[string]bool, cloudProviderName string) {
for _, h := range allHosts {
_, err := k8s.GetNode(kubeClient, h.HostnameOverride, cloudProviderName)
_, err := k8s.GetNode(kubeClient, h.HostnameOverride, h.InternalAddress, cloudProviderName)
if err != nil && apierrors.IsNotFound(err) {
// this host could have been added to cluster state upon successful controlplane upgrade but isn't a node yet.
newHosts[h.HostnameOverride] = true
@ -167,7 +167,7 @@ func processWorkerPlaneForUpgrade(ctx context.Context, kubeClient *kubernetes.Cl
}
if !upgradable {
logrus.Infof("[workerplane] Upgrade not required for worker components of host %v", runHost.HostnameOverride)
if err := k8s.CordonUncordon(kubeClient, runHost.HostnameOverride, cloudProviderName, false); err != nil {
if err := k8s.CordonUncordon(kubeClient, runHost.HostnameOverride, runHost.InternalAddress, cloudProviderName, false); err != nil {
// This node didn't undergo an upgrade, so RKE will only log any error after uncordoning it and won't count this in maxUnavailable
logrus.Errorf("[workerplane] Failed to uncordon node %v, error: %v", runHost.HostnameOverride, err)
}
@ -211,7 +211,7 @@ func upgradeWorkerHost(ctx context.Context, kubeClient *kubernetes.Clientset, ru
return err
}
// uncordon node
return k8s.CordonUncordon(kubeClient, runHost.HostnameOverride, cloudProviderName, false)
return k8s.CordonUncordon(kubeClient, runHost.HostnameOverride, runHost.InternalAddress, cloudProviderName, false)
}
func doDeployWorkerPlaneHost(ctx context.Context, host *hosts.Host, localConnDialerFactory hosts.DialerFactory, prsMap map[string]v3.PrivateRegistry, processMap map[string]v3.Process, certMap map[string]pki.CertificatePKI, updateWorkersOnly bool, alpineImage, k8sVersion string) error {