From 5a35f47dc516ab7b036a3c6bdf21c3438af9fd59 Mon Sep 17 00:00:00 2001 From: galal-hussein Date: Wed, 21 Feb 2018 03:53:32 +0200 Subject: [PATCH] Add remove etcd idempotency --- cluster/reconcile.go | 9 +++++++- services/etcd.go | 50 ++++++++++++++++++++++++++++++++++++++------ 2 files changed, 52 insertions(+), 7 deletions(-) diff --git a/cluster/reconcile.go b/cluster/reconcile.go index 5398afb9..e43963ce 100644 --- a/cluster/reconcile.go +++ b/cluster/reconcile.go @@ -193,9 +193,16 @@ func reconcileEtcd(ctx context.Context, currentCluster, kubeCluster *Cluster, ku return err } - if err := services.AddEtcdMember(ctx, etcdHost, kubeCluster.EtcdHosts, currentCluster.LocalConnDialerFactory, clientCert, clientkey); err != nil { + // Check if the host already part of the cluster -- this will cover cluster with lost quorum + isEtcdMember, err := services.IsEtcdMember(ctx, etcdHost, kubeCluster.EtcdHosts, currentCluster.LocalConnDialerFactory, clientCert, clientkey) + if err != nil { return err } + if !isEtcdMember { + if err := services.AddEtcdMember(ctx, etcdHost, kubeCluster.EtcdHosts, currentCluster.LocalConnDialerFactory, clientCert, clientkey); err != nil { + return err + } + } etcdHost.ToAddEtcdMember = false readyHosts := getReadyEtcdHosts(kubeCluster.EtcdHosts) etcdProcessHostMap := kubeCluster.getEtcdProcessHostMap(readyHosts) diff --git a/services/etcd.go b/services/etcd.go index df833508..45d6db5d 100644 --- a/services/etcd.go +++ b/services/etcd.go @@ -2,11 +2,13 @@ package services import ( "fmt" + "strings" "time" "context" etcdclient "github.com/coreos/etcd/client" + "github.com/pkg/errors" "github.com/rancher/rke/docker" "github.com/rancher/rke/hosts" "github.com/rancher/rke/log" @@ -59,11 +61,14 @@ func RemoveEtcdPlane(ctx context.Context, etcdHosts []*hosts.Host, force bool) e return nil } -func AddEtcdMember(ctx context.Context, etcdHost *hosts.Host, etcdHosts []*hosts.Host, localConnDialerFactory hosts.DialerFactory, cert, key []byte) error { - log.Infof(ctx, "[add/%s] Adding member [etcd-%s] to etcd cluster", ETCDRole, etcdHost.HostnameOverride) - peerURL := fmt.Sprintf("https://%s:2380", etcdHost.InternalAddress) +func AddEtcdMember(ctx context.Context, toAddEtcdHost *hosts.Host, etcdHosts []*hosts.Host, localConnDialerFactory hosts.DialerFactory, cert, key []byte) error { + log.Infof(ctx, "[add/%s] Adding member [etcd-%s] to etcd cluster", ETCDRole, toAddEtcdHost.HostnameOverride) + peerURL := fmt.Sprintf("https://%s:2380", toAddEtcdHost.InternalAddress) added := false for _, host := range etcdHosts { + if host.Address == toAddEtcdHost.Address { + continue + } etcdClient, err := getEtcdClient(ctx, host, localConnDialerFactory, cert, key) if err != nil { logrus.Debugf("Failed to create etcd client for host [%s]: %v", host.Address, err) @@ -71,16 +76,16 @@ func AddEtcdMember(ctx context.Context, etcdHost *hosts.Host, etcdHosts []*hosts } memAPI := etcdclient.NewMembersAPI(etcdClient) if _, err := memAPI.Add(ctx, peerURL); err != nil { - logrus.Debugf("Failed to list etcd members from host [%s]: %v", host.Address, err) + logrus.Debugf("Failed to Add etcd member [%s] from host: %v", host.Address, err) continue } added = true break } if !added { - return fmt.Errorf("Failed to add etcd member [etcd-%s] from etcd cluster", etcdHost.HostnameOverride) + return fmt.Errorf("Failed to add etcd member [etcd-%s] to etcd cluster", toAddEtcdHost.HostnameOverride) } - log.Infof(ctx, "[add/%s] Successfully Added member [etcd-%s] to etcd cluster", ETCDRole, etcdHost.HostnameOverride) + log.Infof(ctx, "[add/%s] Successfully Added member [etcd-%s] to etcd cluster", ETCDRole, toAddEtcdHost.HostnameOverride) return nil } @@ -140,3 +145,36 @@ func ReloadEtcdCluster(ctx context.Context, readyEtcdHosts []*hosts.Host, localC } return nil } + +func IsEtcdMember(ctx context.Context, etcdHost *hosts.Host, etcdHosts []*hosts.Host, localConnDialerFactory hosts.DialerFactory, cert, key []byte) (bool, error) { + var listErr error + peerURL := fmt.Sprintf("https://%s:2380", etcdHost.InternalAddress) + for _, host := range etcdHosts { + if host.Address == etcdHost.Address { + continue + } + etcdClient, err := getEtcdClient(ctx, host, localConnDialerFactory, cert, key) + if err != nil { + listErr = errors.Wrapf(err, "Failed to create etcd client for host [%s]", host.Address) + logrus.Debugf("Failed to create etcd client for host [%s]: %v", host.Address, err) + continue + } + memAPI := etcdclient.NewMembersAPI(etcdClient) + members, err := memAPI.List(ctx) + if err != nil { + listErr = errors.Wrapf(err, "Failed to create etcd client for host [%s]", host.Address) + logrus.Debugf("Failed to list etcd cluster members [%s]: %v", etcdHost.Address, err) + continue + } + for _, member := range members { + if strings.Contains(member.PeerURLs[0], peerURL) { + logrus.Infof("[etcd] member [%s] is already part of the etcd cluster", etcdHost.Address) + return true, nil + } + } + } + if listErr != nil { + return false, listErr + } + return false, nil +}