diff --git a/cluster/cluster.go b/cluster/cluster.go index 0d41c0aa..8269f6a8 100644 --- a/cluster/cluster.go +++ b/cluster/cluster.go @@ -29,6 +29,7 @@ type Cluster struct { EtcdHosts []*hosts.Host WorkerHosts []*hosts.Host ControlPlaneHosts []*hosts.Host + InactiveHosts []*hosts.Host KubeClient *kubernetes.Clientset KubernetesServiceIP net.IP Certificates map[string]pki.CertificatePKI diff --git a/cluster/hosts.go b/cluster/hosts.go index 500ec2b1..6375e0a3 100644 --- a/cluster/hosts.go +++ b/cluster/hosts.go @@ -9,6 +9,7 @@ import ( "github.com/rancher/rke/log" "github.com/rancher/rke/pki" "github.com/rancher/rke/services" + "github.com/rancher/types/apis/management.cattle.io/v3" "github.com/sirupsen/logrus" "golang.org/x/sync/errgroup" ) @@ -26,23 +27,23 @@ func (c *Cluster) TunnelHosts(ctx context.Context, local bool) error { } return nil } - for i := range c.EtcdHosts { - if err := c.EtcdHosts[i].TunnelUp(ctx, c.DockerDialerFactory); err != nil { - return fmt.Errorf("Failed to set up SSH tunneling for Etcd host [%s]: %v", c.EtcdHosts[i].Address, err) + c.InactiveHosts = make([]*hosts.Host, 0) + uniqueHosts := hosts.GetUniqueHostList(c.EtcdHosts, c.ControlPlaneHosts, c.WorkerHosts) + for i := range uniqueHosts { + if err := uniqueHosts[i].TunnelUp(ctx, c.DockerDialerFactory); err != nil { + log.Warnf(ctx, "Failed to set up SSH tunneling for host [%s]: %v", uniqueHosts[i].Address, err) + c.InactiveHosts = append(c.InactiveHosts, uniqueHosts[i]) } } - for i := range c.ControlPlaneHosts { - err := c.ControlPlaneHosts[i].TunnelUp(ctx, c.DockerDialerFactory) - if err != nil { - return fmt.Errorf("Failed to set up SSH tunneling for Control host [%s]: %v", c.ControlPlaneHosts[i].Address, err) - } + for _, host := range c.InactiveHosts { + log.Warnf(ctx, "Removing host [%s] from node lists", host.Address) + c.EtcdHosts = removeFromHosts(host, c.EtcdHosts) + c.ControlPlaneHosts = removeFromHosts(host, c.ControlPlaneHosts) + c.WorkerHosts = removeFromHosts(host, c.WorkerHosts) + c.RancherKubernetesEngineConfig.Nodes = removeFromRKENodes(host.RKEConfigNode, c.RancherKubernetesEngineConfig.Nodes) } - for i := range c.WorkerHosts { - if err := c.WorkerHosts[i].TunnelUp(ctx, c.DockerDialerFactory); err != nil { - return fmt.Errorf("Failed to set up SSH tunneling for Worker host [%s]: %v", c.WorkerHosts[i].Address, err) - } - } - return nil + return ValidateHostCount(c) + } func (c *Cluster) InvertIndexHosts() error { @@ -127,3 +128,21 @@ func CheckEtcdHostsChanged(kubeCluster, currentCluster *Cluster) error { } return nil } + +func removeFromHosts(hostToRemove *hosts.Host, hostList []*hosts.Host) []*hosts.Host { + for i := range hostList { + if hostToRemove.Address == hostList[i].Address { + return append(hostList[:i], hostList[i+1:]...) + } + } + return hostList +} + +func removeFromRKENodes(nodeToRemove v3.RKEConfigNode, nodeList []v3.RKEConfigNode) []v3.RKEConfigNode { + for i := range nodeList { + if nodeToRemove.Address == nodeList[i].Address { + return append(nodeList[:i], nodeList[i+1:]...) + } + } + return nodeList +} diff --git a/cluster/reconcile.go b/cluster/reconcile.go index 7b73df59..e3feac8e 100644 --- a/cluster/reconcile.go +++ b/cluster/reconcile.go @@ -52,7 +52,7 @@ func ReconcileCluster(ctx context.Context, kubeCluster, currentCluster *Cluster) func reconcileWorker(ctx context.Context, currentCluster, kubeCluster *Cluster, kubeClient *kubernetes.Clientset) error { // worker deleted first to avoid issues when worker+controller on same host logrus.Debugf("[reconcile] Check worker hosts to be deleted") - wpToDelete := hosts.GetToDeleteHosts(currentCluster.WorkerHosts, kubeCluster.WorkerHosts) + wpToDelete := hosts.GetToDeleteHosts(currentCluster.WorkerHosts, kubeCluster.WorkerHosts, kubeCluster.InactiveHosts) for _, toDeleteHost := range wpToDelete { toDeleteHost.IsWorker = false if err := hosts.DeleteNode(ctx, toDeleteHost, kubeClient, toDeleteHost.IsControl); err != nil { @@ -80,7 +80,7 @@ func reconcileControl(ctx context.Context, currentCluster, kubeCluster *Cluster, if err != nil { return err } - cpToDelete := hosts.GetToDeleteHosts(currentCluster.ControlPlaneHosts, kubeCluster.ControlPlaneHosts) + cpToDelete := hosts.GetToDeleteHosts(currentCluster.ControlPlaneHosts, kubeCluster.ControlPlaneHosts, kubeCluster.InactiveHosts) // move the current host in local kubeconfig to the end of the list for i, toDeleteHost := range cpToDelete { if toDeleteHost.Address == selfDeleteAddress { @@ -152,7 +152,7 @@ func reconcileEtcd(ctx context.Context, currentCluster, kubeCluster *Cluster, ku clientCert := cert.EncodeCertPEM(currentCluster.Certificates[pki.KubeNodeCertName].Certificate) clientkey := cert.EncodePrivateKeyPEM(currentCluster.Certificates[pki.KubeNodeCertName].Key) - etcdToDelete := hosts.GetToDeleteHosts(currentCluster.EtcdHosts, kubeCluster.EtcdHosts) + etcdToDelete := hosts.GetToDeleteHosts(currentCluster.EtcdHosts, kubeCluster.EtcdHosts, kubeCluster.InactiveHosts) for _, etcdHost := range etcdToDelete { if err := services.RemoveEtcdMember(ctx, etcdHost, kubeCluster.EtcdHosts, currentCluster.LocalConnDialerFactory, clientCert, clientkey); err != nil { log.Warnf(ctx, "[reconcile] %v", err) diff --git a/cluster/validation.go b/cluster/validation.go index 153069eb..9d41efd0 100644 --- a/cluster/validation.go +++ b/cluster/validation.go @@ -9,12 +9,8 @@ import ( func (c *Cluster) ValidateCluster() error { // make sure cluster has at least one controlplane/etcd host - - if len(c.EtcdHosts) == 0 && len(c.Services.Etcd.ExternalURLs) == 0 { - return fmt.Errorf("Cluster must have at least one etcd plane host") - } - if len(c.EtcdHosts) > 0 && len(c.Services.Etcd.ExternalURLs) > 0 { - return fmt.Errorf("Cluster can't have both internal and external etcd") + if err := ValidateHostCount(c); err != nil { + return err } // validate hosts options @@ -120,3 +116,13 @@ func validateIngressOptions(c *Cluster) error { } return nil } + +func ValidateHostCount(c *Cluster) error { + if len(c.EtcdHosts) == 0 && len(c.Services.Etcd.ExternalURLs) == 0 { + return fmt.Errorf("Cluster must have at least one etcd plane host") + } + if len(c.EtcdHosts) > 0 && len(c.Services.Etcd.ExternalURLs) > 0 { + return fmt.Errorf("Cluster can't have both internal and external etcd") + } + return nil +} diff --git a/hosts/hosts.go b/hosts/hosts.go index 6f55ab5a..76c23a41 100644 --- a/hosts/hosts.go +++ b/hosts/hosts.go @@ -159,7 +159,7 @@ func RemoveTaintFromHost(ctx context.Context, host *Host, taintKey string, kubeC return nil } -func GetToDeleteHosts(currentHosts, configHosts []*Host) []*Host { +func GetToDeleteHosts(currentHosts, configHosts, inactiveHosts []*Host) []*Host { toDeleteHosts := []*Host{} for _, currentHost := range currentHosts { found := false @@ -169,7 +169,16 @@ func GetToDeleteHosts(currentHosts, configHosts []*Host) []*Host { } } if !found { - toDeleteHosts = append(toDeleteHosts, currentHost) + inactive := false + for _, inactiveHost := range inactiveHosts { + if inactiveHost.Address == currentHost.Address { + inactive = true + break + } + } + if !inactive { + toDeleteHosts = append(toDeleteHosts, currentHost) + } } } return toDeleteHosts