mirror of
https://github.com/rancher/rke.git
synced 2025-08-01 15:19:09 +00:00
Restore backup to a new etcd plane
This commit is contained in:
parent
bfbdf33820
commit
e3d6fb4db9
@ -17,11 +17,11 @@ import (
|
||||
"github.com/rancher/rke/pki"
|
||||
"github.com/rancher/rke/services"
|
||||
"github.com/rancher/rke/util"
|
||||
"github.com/rancher/types/apis/management.cattle.io/v3"
|
||||
v3 "github.com/rancher/types/apis/management.cattle.io/v3"
|
||||
"github.com/sirupsen/logrus"
|
||||
"golang.org/x/sync/errgroup"
|
||||
"gopkg.in/yaml.v2"
|
||||
"k8s.io/api/core/v1"
|
||||
v1 "k8s.io/api/core/v1"
|
||||
"k8s.io/client-go/kubernetes"
|
||||
"k8s.io/client-go/tools/clientcmd"
|
||||
"k8s.io/client-go/util/cert"
|
||||
@ -338,7 +338,7 @@ func (c *Cluster) deployAddons(ctx context.Context) error {
|
||||
func (c *Cluster) SyncLabelsAndTaints(ctx context.Context, currentCluster *Cluster) error {
|
||||
// Handle issue when deleting all controlplane nodes https://github.com/rancher/rancher/issues/15810
|
||||
if currentCluster != nil {
|
||||
cpToDelete := hosts.GetToDeleteHosts(currentCluster.ControlPlaneHosts, c.ControlPlaneHosts, c.InactiveHosts)
|
||||
cpToDelete := hosts.GetToDeleteHosts(currentCluster.ControlPlaneHosts, c.ControlPlaneHosts, c.InactiveHosts, false)
|
||||
if len(cpToDelete) == len(currentCluster.ControlPlaneHosts) {
|
||||
log.Infof(ctx, "[sync] Cleaning left control plane nodes from reconcilation")
|
||||
for _, toDeleteHost := range cpToDelete {
|
||||
|
@ -185,10 +185,11 @@ func removeFromHosts(hostToRemove *hosts.Host, hostList []*hosts.Host) []*hosts.
|
||||
}
|
||||
|
||||
func removeFromRKENodes(nodeToRemove v3.RKEConfigNode, nodeList []v3.RKEConfigNode) []v3.RKEConfigNode {
|
||||
for i := range nodeList {
|
||||
if nodeToRemove.Address == nodeList[i].Address {
|
||||
return append(nodeList[:i], nodeList[i+1:]...)
|
||||
l := []v3.RKEConfigNode{}
|
||||
for _, node := range nodeList {
|
||||
if nodeToRemove.Address != node.Address {
|
||||
l = append(l, node)
|
||||
}
|
||||
}
|
||||
return nodeList
|
||||
return l
|
||||
}
|
||||
|
@ -11,7 +11,7 @@ import (
|
||||
"github.com/rancher/rke/log"
|
||||
"github.com/rancher/rke/pki"
|
||||
"github.com/rancher/rke/services"
|
||||
"github.com/rancher/types/apis/management.cattle.io/v3"
|
||||
v3 "github.com/rancher/types/apis/management.cattle.io/v3"
|
||||
"github.com/sirupsen/logrus"
|
||||
"k8s.io/client-go/kubernetes"
|
||||
"k8s.io/client-go/util/cert"
|
||||
@ -20,6 +20,8 @@ import (
|
||||
const (
|
||||
unschedulableEtcdTaint = "node-role.kubernetes.io/etcd=true:NoExecute"
|
||||
unschedulableControlTaint = "node-role.kubernetes.io/controlplane=true:NoSchedule"
|
||||
|
||||
EtcdPlaneNodesReplacedErr = "Etcd plane nodes are replaced. Stopping provisioning. Please restore your cluster from backup."
|
||||
)
|
||||
|
||||
func ReconcileCluster(ctx context.Context, kubeCluster, currentCluster *Cluster, flags ExternalFlags) error {
|
||||
@ -62,7 +64,7 @@ func ReconcileCluster(ctx context.Context, kubeCluster, currentCluster *Cluster,
|
||||
func reconcileWorker(ctx context.Context, currentCluster, kubeCluster *Cluster, kubeClient *kubernetes.Clientset) error {
|
||||
// worker deleted first to avoid issues when worker+controller on same host
|
||||
logrus.Debugf("[reconcile] Check worker hosts to be deleted")
|
||||
wpToDelete := hosts.GetToDeleteHosts(currentCluster.WorkerHosts, kubeCluster.WorkerHosts, kubeCluster.InactiveHosts)
|
||||
wpToDelete := hosts.GetToDeleteHosts(currentCluster.WorkerHosts, kubeCluster.WorkerHosts, kubeCluster.InactiveHosts, false)
|
||||
for _, toDeleteHost := range wpToDelete {
|
||||
toDeleteHost.IsWorker = false
|
||||
if err := hosts.DeleteNode(ctx, toDeleteHost, kubeClient, toDeleteHost.IsControl, kubeCluster.CloudProvider.Name); err != nil {
|
||||
@ -94,7 +96,7 @@ func reconcileControl(ctx context.Context, currentCluster, kubeCluster *Cluster,
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
cpToDelete := hosts.GetToDeleteHosts(currentCluster.ControlPlaneHosts, kubeCluster.ControlPlaneHosts, kubeCluster.InactiveHosts)
|
||||
cpToDelete := hosts.GetToDeleteHosts(currentCluster.ControlPlaneHosts, kubeCluster.ControlPlaneHosts, kubeCluster.InactiveHosts, false)
|
||||
// move the current host in local kubeconfig to the end of the list
|
||||
for i, toDeleteHost := range cpToDelete {
|
||||
if toDeleteHost.Address == selfDeleteAddress {
|
||||
@ -165,11 +167,15 @@ func reconcileHost(ctx context.Context, toDeleteHost *hosts.Host, worker, etcd b
|
||||
|
||||
func reconcileEtcd(ctx context.Context, currentCluster, kubeCluster *Cluster, kubeClient *kubernetes.Clientset) error {
|
||||
log.Infof(ctx, "[reconcile] Check etcd hosts to be deleted")
|
||||
if isEtcdPlaneReplaced(ctx, currentCluster, kubeCluster) {
|
||||
logrus.Warnf("%v", EtcdPlaneNodesReplacedErr)
|
||||
return fmt.Errorf("%v", EtcdPlaneNodesReplacedErr)
|
||||
}
|
||||
// get tls for the first current etcd host
|
||||
clientCert := cert.EncodeCertPEM(currentCluster.Certificates[pki.KubeNodeCertName].Certificate)
|
||||
clientkey := cert.EncodePrivateKeyPEM(currentCluster.Certificates[pki.KubeNodeCertName].Key)
|
||||
|
||||
etcdToDelete := hosts.GetToDeleteHosts(currentCluster.EtcdHosts, kubeCluster.EtcdHosts, kubeCluster.InactiveHosts)
|
||||
etcdToDelete := hosts.GetToDeleteHosts(currentCluster.EtcdHosts, kubeCluster.EtcdHosts, kubeCluster.InactiveHosts, false)
|
||||
for _, etcdHost := range etcdToDelete {
|
||||
if err := services.RemoveEtcdMember(ctx, etcdHost, kubeCluster.EtcdHosts, currentCluster.LocalConnDialerFactory, clientCert, clientkey); err != nil {
|
||||
log.Warnf(ctx, "[reconcile] %v", err)
|
||||
@ -335,3 +341,16 @@ func checkCertificateChanges(ctx context.Context, currentCluster, kubeCluster *C
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func isEtcdPlaneReplaced(ctx context.Context, currentCluster, kubeCluster *Cluster) bool {
|
||||
etcdToDeleteInactive := hosts.GetToDeleteHosts(currentCluster.EtcdHosts, kubeCluster.EtcdHosts, kubeCluster.InactiveHosts, true)
|
||||
// old etcd nodes are down, we added new ones
|
||||
if len(etcdToDeleteInactive) == len(currentCluster.EtcdHosts) {
|
||||
return true
|
||||
}
|
||||
// one or more etcd nodes are removed from cluster.yaml and replaced
|
||||
if len(hosts.GetHostListIntersect(kubeCluster.EtcdHosts, currentCluster.EtcdHosts)) == 0 {
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
@ -9,7 +9,7 @@ import (
|
||||
"github.com/rancher/rke/pki"
|
||||
"github.com/rancher/rke/services"
|
||||
"github.com/rancher/rke/util"
|
||||
"github.com/rancher/types/apis/management.cattle.io/v3"
|
||||
v3 "github.com/rancher/types/apis/management.cattle.io/v3"
|
||||
"golang.org/x/sync/errgroup"
|
||||
)
|
||||
|
||||
@ -85,7 +85,8 @@ func (c *Cluster) RemoveOldNodes(ctx context.Context) error {
|
||||
}
|
||||
uniqueHosts := hosts.GetUniqueHostList(c.EtcdHosts, c.ControlPlaneHosts, c.WorkerHosts)
|
||||
for _, node := range nodeList.Items {
|
||||
if k8s.IsNodeReady(node) {
|
||||
_, isEtcd := node.Labels[etcdRoleLabel]
|
||||
if k8s.IsNodeReady(node) && !isEtcd {
|
||||
continue
|
||||
}
|
||||
host := &hosts.Host{}
|
||||
|
@ -158,7 +158,7 @@ func RestoreEtcdSnapshot(
|
||||
}
|
||||
|
||||
if err := cluster.RestartClusterPods(ctx, kubeCluster); err != nil {
|
||||
return nil
|
||||
return err
|
||||
}
|
||||
if err := kubeCluster.RemoveOldNodes(ctx); err != nil {
|
||||
return err
|
||||
|
@ -11,7 +11,7 @@ import (
|
||||
"github.com/rancher/rke/hosts"
|
||||
"github.com/rancher/rke/log"
|
||||
"github.com/rancher/rke/pki"
|
||||
"github.com/rancher/types/apis/management.cattle.io/v3"
|
||||
v3 "github.com/rancher/types/apis/management.cattle.io/v3"
|
||||
"github.com/urfave/cli"
|
||||
"k8s.io/client-go/util/cert"
|
||||
)
|
||||
@ -97,7 +97,6 @@ func ClusterUp(ctx context.Context, dialersOptions hosts.DialersOptions, flags c
|
||||
if err != nil {
|
||||
return APIURL, caCrt, clientCert, clientKey, nil, err
|
||||
}
|
||||
|
||||
err = kubeCluster.TunnelHosts(ctx, flags)
|
||||
if err != nil {
|
||||
return APIURL, caCrt, clientCert, clientKey, nil, err
|
||||
|
@ -14,7 +14,7 @@ import (
|
||||
"github.com/rancher/rke/docker"
|
||||
"github.com/rancher/rke/k8s"
|
||||
"github.com/rancher/rke/log"
|
||||
"github.com/rancher/types/apis/management.cattle.io/v3"
|
||||
v3 "github.com/rancher/types/apis/management.cattle.io/v3"
|
||||
apierrors "k8s.io/apimachinery/pkg/api/errors"
|
||||
"k8s.io/client-go/kubernetes"
|
||||
)
|
||||
@ -179,7 +179,7 @@ func RemoveTaintFromHost(ctx context.Context, host *Host, taintKey string, kubeC
|
||||
return nil
|
||||
}
|
||||
|
||||
func GetToDeleteHosts(currentHosts, configHosts, inactiveHosts []*Host) []*Host {
|
||||
func GetToDeleteHosts(currentHosts, configHosts, inactiveHosts []*Host, includeInactive bool) []*Host {
|
||||
toDeleteHosts := []*Host{}
|
||||
for _, currentHost := range currentHosts {
|
||||
found := false
|
||||
@ -196,7 +196,7 @@ func GetToDeleteHosts(currentHosts, configHosts, inactiveHosts []*Host) []*Host
|
||||
break
|
||||
}
|
||||
}
|
||||
if !inactive {
|
||||
if (inactive && includeInactive) || !inactive {
|
||||
toDeleteHosts = append(toDeleteHosts, currentHost)
|
||||
}
|
||||
}
|
||||
@ -354,3 +354,17 @@ func IsNodeInList(host *Host, hostList []*Host) bool {
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func GetHostListIntersect(a []*Host, b []*Host) []*Host {
|
||||
s := []*Host{}
|
||||
hash := map[string]*Host{}
|
||||
for _, h := range a {
|
||||
hash[h.Address] = h
|
||||
}
|
||||
for _, h := range b {
|
||||
if _, ok := hash[h.Address]; ok {
|
||||
s = append(s, h)
|
||||
}
|
||||
}
|
||||
return s
|
||||
}
|
||||
|
@ -13,7 +13,7 @@ import (
|
||||
"github.com/rancher/rke/docker"
|
||||
"github.com/rancher/rke/hosts"
|
||||
"github.com/rancher/rke/log"
|
||||
"github.com/rancher/types/apis/management.cattle.io/v3"
|
||||
v3 "github.com/rancher/types/apis/management.cattle.io/v3"
|
||||
"github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user