mirror of
https://github.com/rancher/rke.git
synced 2025-08-09 18:58:38 +00:00
Restore backup to a new etcd plane
This commit is contained in:
parent
bfbdf33820
commit
e3d6fb4db9
@ -17,11 +17,11 @@ import (
|
|||||||
"github.com/rancher/rke/pki"
|
"github.com/rancher/rke/pki"
|
||||||
"github.com/rancher/rke/services"
|
"github.com/rancher/rke/services"
|
||||||
"github.com/rancher/rke/util"
|
"github.com/rancher/rke/util"
|
||||||
"github.com/rancher/types/apis/management.cattle.io/v3"
|
v3 "github.com/rancher/types/apis/management.cattle.io/v3"
|
||||||
"github.com/sirupsen/logrus"
|
"github.com/sirupsen/logrus"
|
||||||
"golang.org/x/sync/errgroup"
|
"golang.org/x/sync/errgroup"
|
||||||
"gopkg.in/yaml.v2"
|
"gopkg.in/yaml.v2"
|
||||||
"k8s.io/api/core/v1"
|
v1 "k8s.io/api/core/v1"
|
||||||
"k8s.io/client-go/kubernetes"
|
"k8s.io/client-go/kubernetes"
|
||||||
"k8s.io/client-go/tools/clientcmd"
|
"k8s.io/client-go/tools/clientcmd"
|
||||||
"k8s.io/client-go/util/cert"
|
"k8s.io/client-go/util/cert"
|
||||||
@ -338,7 +338,7 @@ func (c *Cluster) deployAddons(ctx context.Context) error {
|
|||||||
func (c *Cluster) SyncLabelsAndTaints(ctx context.Context, currentCluster *Cluster) error {
|
func (c *Cluster) SyncLabelsAndTaints(ctx context.Context, currentCluster *Cluster) error {
|
||||||
// Handle issue when deleting all controlplane nodes https://github.com/rancher/rancher/issues/15810
|
// Handle issue when deleting all controlplane nodes https://github.com/rancher/rancher/issues/15810
|
||||||
if currentCluster != nil {
|
if currentCluster != nil {
|
||||||
cpToDelete := hosts.GetToDeleteHosts(currentCluster.ControlPlaneHosts, c.ControlPlaneHosts, c.InactiveHosts)
|
cpToDelete := hosts.GetToDeleteHosts(currentCluster.ControlPlaneHosts, c.ControlPlaneHosts, c.InactiveHosts, false)
|
||||||
if len(cpToDelete) == len(currentCluster.ControlPlaneHosts) {
|
if len(cpToDelete) == len(currentCluster.ControlPlaneHosts) {
|
||||||
log.Infof(ctx, "[sync] Cleaning left control plane nodes from reconcilation")
|
log.Infof(ctx, "[sync] Cleaning left control plane nodes from reconcilation")
|
||||||
for _, toDeleteHost := range cpToDelete {
|
for _, toDeleteHost := range cpToDelete {
|
||||||
|
@ -185,10 +185,11 @@ func removeFromHosts(hostToRemove *hosts.Host, hostList []*hosts.Host) []*hosts.
|
|||||||
}
|
}
|
||||||
|
|
||||||
func removeFromRKENodes(nodeToRemove v3.RKEConfigNode, nodeList []v3.RKEConfigNode) []v3.RKEConfigNode {
|
func removeFromRKENodes(nodeToRemove v3.RKEConfigNode, nodeList []v3.RKEConfigNode) []v3.RKEConfigNode {
|
||||||
for i := range nodeList {
|
l := []v3.RKEConfigNode{}
|
||||||
if nodeToRemove.Address == nodeList[i].Address {
|
for _, node := range nodeList {
|
||||||
return append(nodeList[:i], nodeList[i+1:]...)
|
if nodeToRemove.Address != node.Address {
|
||||||
|
l = append(l, node)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return nodeList
|
return l
|
||||||
}
|
}
|
||||||
|
@ -11,7 +11,7 @@ import (
|
|||||||
"github.com/rancher/rke/log"
|
"github.com/rancher/rke/log"
|
||||||
"github.com/rancher/rke/pki"
|
"github.com/rancher/rke/pki"
|
||||||
"github.com/rancher/rke/services"
|
"github.com/rancher/rke/services"
|
||||||
"github.com/rancher/types/apis/management.cattle.io/v3"
|
v3 "github.com/rancher/types/apis/management.cattle.io/v3"
|
||||||
"github.com/sirupsen/logrus"
|
"github.com/sirupsen/logrus"
|
||||||
"k8s.io/client-go/kubernetes"
|
"k8s.io/client-go/kubernetes"
|
||||||
"k8s.io/client-go/util/cert"
|
"k8s.io/client-go/util/cert"
|
||||||
@ -20,6 +20,8 @@ import (
|
|||||||
const (
|
const (
|
||||||
unschedulableEtcdTaint = "node-role.kubernetes.io/etcd=true:NoExecute"
|
unschedulableEtcdTaint = "node-role.kubernetes.io/etcd=true:NoExecute"
|
||||||
unschedulableControlTaint = "node-role.kubernetes.io/controlplane=true:NoSchedule"
|
unschedulableControlTaint = "node-role.kubernetes.io/controlplane=true:NoSchedule"
|
||||||
|
|
||||||
|
EtcdPlaneNodesReplacedErr = "Etcd plane nodes are replaced. Stopping provisioning. Please restore your cluster from backup."
|
||||||
)
|
)
|
||||||
|
|
||||||
func ReconcileCluster(ctx context.Context, kubeCluster, currentCluster *Cluster, flags ExternalFlags) error {
|
func ReconcileCluster(ctx context.Context, kubeCluster, currentCluster *Cluster, flags ExternalFlags) error {
|
||||||
@ -62,7 +64,7 @@ func ReconcileCluster(ctx context.Context, kubeCluster, currentCluster *Cluster,
|
|||||||
func reconcileWorker(ctx context.Context, currentCluster, kubeCluster *Cluster, kubeClient *kubernetes.Clientset) error {
|
func reconcileWorker(ctx context.Context, currentCluster, kubeCluster *Cluster, kubeClient *kubernetes.Clientset) error {
|
||||||
// worker deleted first to avoid issues when worker+controller on same host
|
// worker deleted first to avoid issues when worker+controller on same host
|
||||||
logrus.Debugf("[reconcile] Check worker hosts to be deleted")
|
logrus.Debugf("[reconcile] Check worker hosts to be deleted")
|
||||||
wpToDelete := hosts.GetToDeleteHosts(currentCluster.WorkerHosts, kubeCluster.WorkerHosts, kubeCluster.InactiveHosts)
|
wpToDelete := hosts.GetToDeleteHosts(currentCluster.WorkerHosts, kubeCluster.WorkerHosts, kubeCluster.InactiveHosts, false)
|
||||||
for _, toDeleteHost := range wpToDelete {
|
for _, toDeleteHost := range wpToDelete {
|
||||||
toDeleteHost.IsWorker = false
|
toDeleteHost.IsWorker = false
|
||||||
if err := hosts.DeleteNode(ctx, toDeleteHost, kubeClient, toDeleteHost.IsControl, kubeCluster.CloudProvider.Name); err != nil {
|
if err := hosts.DeleteNode(ctx, toDeleteHost, kubeClient, toDeleteHost.IsControl, kubeCluster.CloudProvider.Name); err != nil {
|
||||||
@ -94,7 +96,7 @@ func reconcileControl(ctx context.Context, currentCluster, kubeCluster *Cluster,
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
cpToDelete := hosts.GetToDeleteHosts(currentCluster.ControlPlaneHosts, kubeCluster.ControlPlaneHosts, kubeCluster.InactiveHosts)
|
cpToDelete := hosts.GetToDeleteHosts(currentCluster.ControlPlaneHosts, kubeCluster.ControlPlaneHosts, kubeCluster.InactiveHosts, false)
|
||||||
// move the current host in local kubeconfig to the end of the list
|
// move the current host in local kubeconfig to the end of the list
|
||||||
for i, toDeleteHost := range cpToDelete {
|
for i, toDeleteHost := range cpToDelete {
|
||||||
if toDeleteHost.Address == selfDeleteAddress {
|
if toDeleteHost.Address == selfDeleteAddress {
|
||||||
@ -165,11 +167,15 @@ func reconcileHost(ctx context.Context, toDeleteHost *hosts.Host, worker, etcd b
|
|||||||
|
|
||||||
func reconcileEtcd(ctx context.Context, currentCluster, kubeCluster *Cluster, kubeClient *kubernetes.Clientset) error {
|
func reconcileEtcd(ctx context.Context, currentCluster, kubeCluster *Cluster, kubeClient *kubernetes.Clientset) error {
|
||||||
log.Infof(ctx, "[reconcile] Check etcd hosts to be deleted")
|
log.Infof(ctx, "[reconcile] Check etcd hosts to be deleted")
|
||||||
|
if isEtcdPlaneReplaced(ctx, currentCluster, kubeCluster) {
|
||||||
|
logrus.Warnf("%v", EtcdPlaneNodesReplacedErr)
|
||||||
|
return fmt.Errorf("%v", EtcdPlaneNodesReplacedErr)
|
||||||
|
}
|
||||||
// get tls for the first current etcd host
|
// get tls for the first current etcd host
|
||||||
clientCert := cert.EncodeCertPEM(currentCluster.Certificates[pki.KubeNodeCertName].Certificate)
|
clientCert := cert.EncodeCertPEM(currentCluster.Certificates[pki.KubeNodeCertName].Certificate)
|
||||||
clientkey := cert.EncodePrivateKeyPEM(currentCluster.Certificates[pki.KubeNodeCertName].Key)
|
clientkey := cert.EncodePrivateKeyPEM(currentCluster.Certificates[pki.KubeNodeCertName].Key)
|
||||||
|
|
||||||
etcdToDelete := hosts.GetToDeleteHosts(currentCluster.EtcdHosts, kubeCluster.EtcdHosts, kubeCluster.InactiveHosts)
|
etcdToDelete := hosts.GetToDeleteHosts(currentCluster.EtcdHosts, kubeCluster.EtcdHosts, kubeCluster.InactiveHosts, false)
|
||||||
for _, etcdHost := range etcdToDelete {
|
for _, etcdHost := range etcdToDelete {
|
||||||
if err := services.RemoveEtcdMember(ctx, etcdHost, kubeCluster.EtcdHosts, currentCluster.LocalConnDialerFactory, clientCert, clientkey); err != nil {
|
if err := services.RemoveEtcdMember(ctx, etcdHost, kubeCluster.EtcdHosts, currentCluster.LocalConnDialerFactory, clientCert, clientkey); err != nil {
|
||||||
log.Warnf(ctx, "[reconcile] %v", err)
|
log.Warnf(ctx, "[reconcile] %v", err)
|
||||||
@ -335,3 +341,16 @@ func checkCertificateChanges(ctx context.Context, currentCluster, kubeCluster *C
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func isEtcdPlaneReplaced(ctx context.Context, currentCluster, kubeCluster *Cluster) bool {
|
||||||
|
etcdToDeleteInactive := hosts.GetToDeleteHosts(currentCluster.EtcdHosts, kubeCluster.EtcdHosts, kubeCluster.InactiveHosts, true)
|
||||||
|
// old etcd nodes are down, we added new ones
|
||||||
|
if len(etcdToDeleteInactive) == len(currentCluster.EtcdHosts) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
// one or more etcd nodes are removed from cluster.yaml and replaced
|
||||||
|
if len(hosts.GetHostListIntersect(kubeCluster.EtcdHosts, currentCluster.EtcdHosts)) == 0 {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
@ -9,7 +9,7 @@ import (
|
|||||||
"github.com/rancher/rke/pki"
|
"github.com/rancher/rke/pki"
|
||||||
"github.com/rancher/rke/services"
|
"github.com/rancher/rke/services"
|
||||||
"github.com/rancher/rke/util"
|
"github.com/rancher/rke/util"
|
||||||
"github.com/rancher/types/apis/management.cattle.io/v3"
|
v3 "github.com/rancher/types/apis/management.cattle.io/v3"
|
||||||
"golang.org/x/sync/errgroup"
|
"golang.org/x/sync/errgroup"
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -85,7 +85,8 @@ func (c *Cluster) RemoveOldNodes(ctx context.Context) error {
|
|||||||
}
|
}
|
||||||
uniqueHosts := hosts.GetUniqueHostList(c.EtcdHosts, c.ControlPlaneHosts, c.WorkerHosts)
|
uniqueHosts := hosts.GetUniqueHostList(c.EtcdHosts, c.ControlPlaneHosts, c.WorkerHosts)
|
||||||
for _, node := range nodeList.Items {
|
for _, node := range nodeList.Items {
|
||||||
if k8s.IsNodeReady(node) {
|
_, isEtcd := node.Labels[etcdRoleLabel]
|
||||||
|
if k8s.IsNodeReady(node) && !isEtcd {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
host := &hosts.Host{}
|
host := &hosts.Host{}
|
||||||
|
@ -158,7 +158,7 @@ func RestoreEtcdSnapshot(
|
|||||||
}
|
}
|
||||||
|
|
||||||
if err := cluster.RestartClusterPods(ctx, kubeCluster); err != nil {
|
if err := cluster.RestartClusterPods(ctx, kubeCluster); err != nil {
|
||||||
return nil
|
return err
|
||||||
}
|
}
|
||||||
if err := kubeCluster.RemoveOldNodes(ctx); err != nil {
|
if err := kubeCluster.RemoveOldNodes(ctx); err != nil {
|
||||||
return err
|
return err
|
||||||
|
@ -11,7 +11,7 @@ import (
|
|||||||
"github.com/rancher/rke/hosts"
|
"github.com/rancher/rke/hosts"
|
||||||
"github.com/rancher/rke/log"
|
"github.com/rancher/rke/log"
|
||||||
"github.com/rancher/rke/pki"
|
"github.com/rancher/rke/pki"
|
||||||
"github.com/rancher/types/apis/management.cattle.io/v3"
|
v3 "github.com/rancher/types/apis/management.cattle.io/v3"
|
||||||
"github.com/urfave/cli"
|
"github.com/urfave/cli"
|
||||||
"k8s.io/client-go/util/cert"
|
"k8s.io/client-go/util/cert"
|
||||||
)
|
)
|
||||||
@ -97,7 +97,6 @@ func ClusterUp(ctx context.Context, dialersOptions hosts.DialersOptions, flags c
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
return APIURL, caCrt, clientCert, clientKey, nil, err
|
return APIURL, caCrt, clientCert, clientKey, nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
err = kubeCluster.TunnelHosts(ctx, flags)
|
err = kubeCluster.TunnelHosts(ctx, flags)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return APIURL, caCrt, clientCert, clientKey, nil, err
|
return APIURL, caCrt, clientCert, clientKey, nil, err
|
||||||
|
@ -14,7 +14,7 @@ import (
|
|||||||
"github.com/rancher/rke/docker"
|
"github.com/rancher/rke/docker"
|
||||||
"github.com/rancher/rke/k8s"
|
"github.com/rancher/rke/k8s"
|
||||||
"github.com/rancher/rke/log"
|
"github.com/rancher/rke/log"
|
||||||
"github.com/rancher/types/apis/management.cattle.io/v3"
|
v3 "github.com/rancher/types/apis/management.cattle.io/v3"
|
||||||
apierrors "k8s.io/apimachinery/pkg/api/errors"
|
apierrors "k8s.io/apimachinery/pkg/api/errors"
|
||||||
"k8s.io/client-go/kubernetes"
|
"k8s.io/client-go/kubernetes"
|
||||||
)
|
)
|
||||||
@ -179,7 +179,7 @@ func RemoveTaintFromHost(ctx context.Context, host *Host, taintKey string, kubeC
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func GetToDeleteHosts(currentHosts, configHosts, inactiveHosts []*Host) []*Host {
|
func GetToDeleteHosts(currentHosts, configHosts, inactiveHosts []*Host, includeInactive bool) []*Host {
|
||||||
toDeleteHosts := []*Host{}
|
toDeleteHosts := []*Host{}
|
||||||
for _, currentHost := range currentHosts {
|
for _, currentHost := range currentHosts {
|
||||||
found := false
|
found := false
|
||||||
@ -196,7 +196,7 @@ func GetToDeleteHosts(currentHosts, configHosts, inactiveHosts []*Host) []*Host
|
|||||||
break
|
break
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if !inactive {
|
if (inactive && includeInactive) || !inactive {
|
||||||
toDeleteHosts = append(toDeleteHosts, currentHost)
|
toDeleteHosts = append(toDeleteHosts, currentHost)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -354,3 +354,17 @@ func IsNodeInList(host *Host, hostList []*Host) bool {
|
|||||||
}
|
}
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func GetHostListIntersect(a []*Host, b []*Host) []*Host {
|
||||||
|
s := []*Host{}
|
||||||
|
hash := map[string]*Host{}
|
||||||
|
for _, h := range a {
|
||||||
|
hash[h.Address] = h
|
||||||
|
}
|
||||||
|
for _, h := range b {
|
||||||
|
if _, ok := hash[h.Address]; ok {
|
||||||
|
s = append(s, h)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return s
|
||||||
|
}
|
||||||
|
@ -13,7 +13,7 @@ import (
|
|||||||
"github.com/rancher/rke/docker"
|
"github.com/rancher/rke/docker"
|
||||||
"github.com/rancher/rke/hosts"
|
"github.com/rancher/rke/hosts"
|
||||||
"github.com/rancher/rke/log"
|
"github.com/rancher/rke/log"
|
||||||
"github.com/rancher/types/apis/management.cattle.io/v3"
|
v3 "github.com/rancher/types/apis/management.cattle.io/v3"
|
||||||
"github.com/sirupsen/logrus"
|
"github.com/sirupsen/logrus"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user