diff --git a/cluster/reconcile.go b/cluster/reconcile.go index 8bea2575..946ca995 100644 --- a/cluster/reconcile.go +++ b/cluster/reconcile.go @@ -208,7 +208,8 @@ func reconcileEtcd(ctx context.Context, currentCluster, kubeCluster *Cluster, ku for _, etcdReadyHost := range kubeCluster.EtcdReadyHosts { etcdNodePlanMap[etcdReadyHost.Address] = BuildRKEConfigNodePlan(ctx, kubeCluster, etcdReadyHost, etcdReadyHost.DockerInfo) } - + // this will start the newly added etcd node and make sure it started correctly before restarting other node + // https://github.com/etcd-io/etcd/blob/master/Documentation/op-guide/runtime-configuration.md#add-a-new-member if err := services.ReloadEtcdCluster(ctx, kubeCluster.EtcdReadyHosts, etcdHost, currentCluster.LocalConnDialerFactory, clientCert, clientkey, currentCluster.PrivateRegistriesMap, etcdNodePlanMap, kubeCluster.SystemImages.Alpine); err != nil { return err } diff --git a/services/etcd.go b/services/etcd.go index 3c65f7a7..8d4044c9 100644 --- a/services/etcd.go +++ b/services/etcd.go @@ -23,7 +23,7 @@ const ( EtcdSnapshotPath = "/opt/rke/etcd-snapshots/" EtcdRestorePath = "/opt/rke/etcd-snapshots-restore/" EtcdDataDir = "/var/lib/rancher/etcd/" - EtcdInitWaitTime = 5 + EtcdInitWaitTime = 10 ) type EtcdSnapshot struct { @@ -163,21 +163,6 @@ func RemoveEtcdMember(ctx context.Context, etcdHost *hosts.Host, etcdHosts []*ho } func ReloadEtcdCluster(ctx context.Context, readyEtcdHosts []*hosts.Host, newHost *hosts.Host, localConnDialerFactory hosts.DialerFactory, cert, key []byte, prsMap map[string]v3.PrivateRegistry, etcdNodePlanMap map[string]v3.RKEConfigNodePlan, alpineImage string) error { - // update the old nodes - for _, etcdHost := range readyEtcdHosts { - if etcdHost.Address == newHost.Address { - continue - } - imageCfg, hostCfg, _ := GetProcessConfig(etcdNodePlanMap[etcdHost.Address].Processes[EtcdContainerName]) - if err := docker.DoRunContainer(ctx, etcdHost.DClient, imageCfg, hostCfg, EtcdContainerName, etcdHost.Address, ETCDRole, prsMap); err != nil { - return err - } - if err := createLogLink(ctx, etcdHost, EtcdContainerName, ETCDRole, alpineImage, prsMap); err != nil { - return err - } - time.Sleep(EtcdInitWaitTime * time.Second) - } - // run the new etcd at last imageCfg, hostCfg, _ := GetProcessConfig(etcdNodePlanMap[newHost.Address].Processes[EtcdContainerName]) if err := docker.DoRunContainer(ctx, newHost.DClient, imageCfg, hostCfg, EtcdContainerName, newHost.Address, ETCDRole, prsMap); err != nil { return err @@ -185,7 +170,7 @@ func ReloadEtcdCluster(ctx context.Context, readyEtcdHosts []*hosts.Host, newHos if err := createLogLink(ctx, newHost, EtcdContainerName, ETCDRole, alpineImage, prsMap); err != nil { return err } - time.Sleep(10 * time.Second) + time.Sleep(EtcdInitWaitTime * time.Second) var healthy bool for _, host := range readyEtcdHosts { _, _, healthCheckURL := GetProcessConfig(etcdNodePlanMap[host.Address].Processes[EtcdContainerName])