mirror of
https://github.com/rancher/rke.git
synced 2025-10-21 11:09:12 +00:00
Automatically sync local backups
This commit is contained in:
committed by
Alena Prokharchyk
parent
fa332f7e07
commit
285ac8d81c
@@ -3,9 +3,17 @@ package cluster
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"github.com/rancher/rke/docker"
|
||||
"github.com/rancher/rke/hosts"
|
||||
"github.com/rancher/rke/log"
|
||||
"github.com/rancher/rke/services"
|
||||
"github.com/rancher/rke/util"
|
||||
)
|
||||
|
||||
const (
|
||||
SupportedSyncToolsVersion = "0.1.22"
|
||||
)
|
||||
|
||||
func (c *Cluster) SnapshotEtcd(ctx context.Context, snapshotName string) error {
|
||||
@@ -17,20 +25,57 @@ func (c *Cluster) SnapshotEtcd(ctx context.Context, snapshotName string) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *Cluster) RestoreEtcdSnapshot(ctx context.Context, snapshotPath string) error {
|
||||
// get etcd snapshots from s3 if backup backend server is set
|
||||
func (c *Cluster) PrepareBackup(ctx context.Context, snapshotPath string) error {
|
||||
// local backup case
|
||||
var backupServer *hosts.Host
|
||||
// stop etcd on all etcd nodes, we need this because we start the backup server on the same port
|
||||
if !isAutoSyncSupported(c.SystemImages.Alpine) {
|
||||
log.Warnf(ctx, "Auto local backup sync is not supported. Use `rancher/rke-tools:%s` or up", SupportedSyncToolsVersion)
|
||||
} else if c.Services.Etcd.BackupConfig == nil || // legacy rke local backup
|
||||
(c.Services.Etcd.BackupConfig != nil && c.Services.Etcd.BackupConfig.S3BackupConfig == nil) { // rancher local backup, no s3
|
||||
for _, host := range c.EtcdHosts {
|
||||
if err := docker.StopContainer(ctx, host.DClient, host.Address, services.EtcdContainerName); err != nil {
|
||||
log.Warnf(ctx, "failed to stop etcd container on host [%s]: %v", host.Address, err)
|
||||
}
|
||||
if backupServer == nil { // start the download server, only one node should have it!
|
||||
if err := services.StartBackupServer(ctx, host, c.PrivateRegistriesMap, c.SystemImages.Alpine, snapshotPath); err != nil {
|
||||
log.Warnf(ctx, "failed to start backup server on host [%s]: %v", host.Address, err)
|
||||
continue
|
||||
}
|
||||
backupServer = host
|
||||
}
|
||||
}
|
||||
// start downloading the snapshot
|
||||
for _, host := range c.EtcdHosts {
|
||||
if backupServer != nil && host.Address == backupServer.Address { // we skip the backup server if it's there
|
||||
continue
|
||||
}
|
||||
if err := services.DownloadEtcdSnapshotFromBackupServer(ctx, host, c.PrivateRegistriesMap, c.SystemImages.Alpine, snapshotPath, backupServer); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
// all good, let's remove the backup server container
|
||||
if err := docker.DoRemoveContainer(ctx, backupServer.DClient, services.EtcdServeBackupContainerName, backupServer.Address); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
// s3 backup case
|
||||
if c.Services.Etcd.BackupConfig != nil && c.Services.Etcd.BackupConfig.S3BackupConfig != nil {
|
||||
for _, host := range c.EtcdHosts {
|
||||
if err := services.DownloadEtcdSnapshot(ctx, host, c.PrivateRegistriesMap, c.SystemImages.Alpine, snapshotPath, c.Services.Etcd); err != nil {
|
||||
if err := services.DownloadEtcdSnapshotFromS3(ctx, host, c.PrivateRegistriesMap, c.SystemImages.Alpine, snapshotPath, c.Services.Etcd); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// this applies to all cases!
|
||||
if isEqual := c.etcdSnapshotChecksum(ctx, snapshotPath); !isEqual {
|
||||
return fmt.Errorf("etcd snapshots are not consistent")
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
func (c *Cluster) RestoreEtcdSnapshot(ctx context.Context, snapshotPath string) error {
|
||||
// Start restore process on all etcd hosts
|
||||
initCluster := services.GetEtcdInitialCluster(c.EtcdHosts)
|
||||
for _, host := range c.EtcdHosts {
|
||||
@@ -60,3 +105,22 @@ func (c *Cluster) etcdSnapshotChecksum(ctx context.Context, snapshotPath string)
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func isAutoSyncSupported(image string) bool {
|
||||
v := strings.Split(image, ":")
|
||||
last := v[len(v)-1]
|
||||
|
||||
sv, err := util.StrToSemVer(last)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
|
||||
supported, err := util.StrToSemVer(SupportedSyncToolsVersion)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
if sv.LessThan(*supported) {
|
||||
return false
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
@@ -130,7 +130,10 @@ func RestoreEtcdSnapshot(
|
||||
if err := kubeCluster.TunnelHosts(ctx, flags); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// first download and check
|
||||
if err := kubeCluster.PrepareBackup(ctx, snapshotName); err != nil {
|
||||
return err
|
||||
}
|
||||
log.Infof(ctx, "Cleaning old kubernetes cluster")
|
||||
if err := kubeCluster.CleanupNodes(ctx); err != nil {
|
||||
return err
|
||||
|
@@ -335,7 +335,8 @@ func RunEtcdSnapshotSave(ctx context.Context, etcdHost *hosts.Host, prsMap map[s
|
||||
return nil
|
||||
}
|
||||
|
||||
func DownloadEtcdSnapshot(ctx context.Context, etcdHost *hosts.Host, prsMap map[string]v3.PrivateRegistry, etcdSnapshotImage string, name string, es v3.ETCDService) error {
|
||||
func DownloadEtcdSnapshotFromS3(ctx context.Context, etcdHost *hosts.Host, prsMap map[string]v3.PrivateRegistry, etcdSnapshotImage string, name string, es v3.ETCDService) error {
|
||||
|
||||
log.Infof(ctx, "[etcd] Get snapshot [%s] on host [%s]", name, etcdHost.Address)
|
||||
s3Backend := es.BackupConfig.S3BackupConfig
|
||||
if len(s3Backend.Endpoint) == 0 || len(s3Backend.BucketName) == 0 {
|
||||
@@ -347,6 +348,7 @@ func DownloadEtcdSnapshot(ctx context.Context, etcdHost *hosts.Host, prsMap map[
|
||||
"etcd-backup",
|
||||
"download",
|
||||
"--name", name,
|
||||
"--s3-backup=true",
|
||||
"--s3-endpoint=" + s3Backend.Endpoint,
|
||||
"--s3-accessKey=" + s3Backend.AccessKey,
|
||||
"--s3-secretKey=" + s3Backend.SecretKey,
|
||||
@@ -490,3 +492,69 @@ func configS3BackupImgCmd(ctx context.Context, imageCfg *container.Config, bc *v
|
||||
imageCfg.Cmd = append(imageCfg.Cmd, cmd...)
|
||||
return imageCfg
|
||||
}
|
||||
|
||||
func StartBackupServer(ctx context.Context, etcdHost *hosts.Host, prsMap map[string]v3.PrivateRegistry, etcdSnapshotImage string, name string) error {
|
||||
log.Infof(ctx, "[etcd] starting backup server on host [%s]", etcdHost.Address)
|
||||
|
||||
imageCfg := &container.Config{
|
||||
Cmd: []string{
|
||||
"/opt/rke-tools/rke-etcd-backup",
|
||||
"etcd-backup",
|
||||
"serve",
|
||||
"--name", name,
|
||||
"--cacert", pki.GetCertPath(pki.CACertName),
|
||||
"--cert", pki.GetCertPath(pki.KubeNodeCertName),
|
||||
"--key", pki.GetKeyPath(pki.KubeNodeCertName),
|
||||
},
|
||||
Image: etcdSnapshotImage,
|
||||
}
|
||||
hostCfg := &container.HostConfig{
|
||||
Binds: []string{
|
||||
fmt.Sprintf("%s:/backup", EtcdSnapshotPath),
|
||||
fmt.Sprintf("%s:/etc/kubernetes:z", path.Join(etcdHost.PrefixPath, "/etc/kubernetes"))},
|
||||
NetworkMode: container.NetworkMode("host"),
|
||||
RestartPolicy: container.RestartPolicy{Name: "on-failure"},
|
||||
}
|
||||
return docker.DoRunContainer(ctx, etcdHost.DClient, imageCfg, hostCfg, EtcdServeBackupContainerName, etcdHost.Address, ETCDRole, prsMap)
|
||||
}
|
||||
|
||||
func DownloadEtcdSnapshotFromBackupServer(ctx context.Context, etcdHost *hosts.Host, prsMap map[string]v3.PrivateRegistry, etcdSnapshotImage, name string, backupServer *hosts.Host) error {
|
||||
log.Infof(ctx, "[etcd] Get snapshot [%s] on host [%s]", name, etcdHost.Address)
|
||||
imageCfg := &container.Config{
|
||||
Cmd: []string{
|
||||
"/opt/rke-tools/rke-etcd-backup",
|
||||
"etcd-backup",
|
||||
"download",
|
||||
"--name", name,
|
||||
"--local-endpoint", backupServer.Address,
|
||||
"--cacert", pki.GetCertPath(pki.CACertName),
|
||||
"--cert", pki.GetCertPath(pki.KubeNodeCertName),
|
||||
"--key", pki.GetKeyPath(pki.KubeNodeCertName),
|
||||
},
|
||||
Image: etcdSnapshotImage,
|
||||
}
|
||||
|
||||
hostCfg := &container.HostConfig{
|
||||
Binds: []string{
|
||||
fmt.Sprintf("%s:/backup", EtcdSnapshotPath),
|
||||
fmt.Sprintf("%s:/etc/kubernetes:z", path.Join(etcdHost.PrefixPath, "/etc/kubernetes"))},
|
||||
NetworkMode: container.NetworkMode("host"),
|
||||
RestartPolicy: container.RestartPolicy{Name: "on-failure"},
|
||||
}
|
||||
|
||||
if err := docker.DoRunContainer(ctx, etcdHost.DClient, imageCfg, hostCfg, EtcdDownloadBackupContainerName, etcdHost.Address, ETCDRole, prsMap); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
status, _, stderr, err := docker.GetContainerOutput(ctx, etcdHost.DClient, EtcdDownloadBackupContainerName, etcdHost.Address)
|
||||
if status != 0 || err != nil {
|
||||
if removeErr := docker.RemoveContainer(ctx, etcdHost.DClient, etcdHost.Address, EtcdDownloadBackupContainerName); removeErr != nil {
|
||||
log.Warnf(ctx, "Failed to remove container [%s]: %v", removeErr)
|
||||
}
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return fmt.Errorf("Failed to download etcd snapshot from backup server [%s], exit code [%d]: %v", backupServer.Address, status, stderr)
|
||||
}
|
||||
return docker.RemoveContainer(ctx, etcdHost.DClient, etcdHost.Address, EtcdDownloadBackupContainerName)
|
||||
}
|
||||
|
@@ -32,6 +32,7 @@ const (
|
||||
EtcdSnapshotOnceContainerName = "etcd-snapshot-once"
|
||||
EtcdRestoreContainerName = "etcd-restore"
|
||||
EtcdDownloadBackupContainerName = "etcd-download-backup"
|
||||
EtcdServeBackupContainerName = "etcd-Serve-backup"
|
||||
EtcdChecksumContainerName = "etcd-checksum-checker"
|
||||
NginxProxyContainerName = "nginx-proxy"
|
||||
SidekickContainerName = "service-sidekick"
|
||||
|
Reference in New Issue
Block a user