1
0
mirror of https://github.com/rancher/rke.git synced 2025-10-21 11:09:12 +00:00

Automatically sync local backups

This commit is contained in:
moelsayed
2019-01-28 19:17:44 +02:00
committed by Alena Prokharchyk
parent fa332f7e07
commit 285ac8d81c
4 changed files with 142 additions and 6 deletions

View File

@@ -3,9 +3,17 @@ package cluster
import (
"context"
"fmt"
"strings"
"github.com/rancher/rke/docker"
"github.com/rancher/rke/hosts"
"github.com/rancher/rke/log"
"github.com/rancher/rke/services"
"github.com/rancher/rke/util"
)
const (
SupportedSyncToolsVersion = "0.1.22"
)
func (c *Cluster) SnapshotEtcd(ctx context.Context, snapshotName string) error {
@@ -17,20 +25,57 @@ func (c *Cluster) SnapshotEtcd(ctx context.Context, snapshotName string) error {
return nil
}
func (c *Cluster) RestoreEtcdSnapshot(ctx context.Context, snapshotPath string) error {
// get etcd snapshots from s3 if backup backend server is set
func (c *Cluster) PrepareBackup(ctx context.Context, snapshotPath string) error {
// local backup case
var backupServer *hosts.Host
// stop etcd on all etcd nodes, we need this because we start the backup server on the same port
if !isAutoSyncSupported(c.SystemImages.Alpine) {
log.Warnf(ctx, "Auto local backup sync is not supported. Use `rancher/rke-tools:%s` or up", SupportedSyncToolsVersion)
} else if c.Services.Etcd.BackupConfig == nil || // legacy rke local backup
(c.Services.Etcd.BackupConfig != nil && c.Services.Etcd.BackupConfig.S3BackupConfig == nil) { // rancher local backup, no s3
for _, host := range c.EtcdHosts {
if err := docker.StopContainer(ctx, host.DClient, host.Address, services.EtcdContainerName); err != nil {
log.Warnf(ctx, "failed to stop etcd container on host [%s]: %v", host.Address, err)
}
if backupServer == nil { // start the download server, only one node should have it!
if err := services.StartBackupServer(ctx, host, c.PrivateRegistriesMap, c.SystemImages.Alpine, snapshotPath); err != nil {
log.Warnf(ctx, "failed to start backup server on host [%s]: %v", host.Address, err)
continue
}
backupServer = host
}
}
// start downloading the snapshot
for _, host := range c.EtcdHosts {
if backupServer != nil && host.Address == backupServer.Address { // we skip the backup server if it's there
continue
}
if err := services.DownloadEtcdSnapshotFromBackupServer(ctx, host, c.PrivateRegistriesMap, c.SystemImages.Alpine, snapshotPath, backupServer); err != nil {
return err
}
}
// all good, let's remove the backup server container
if err := docker.DoRemoveContainer(ctx, backupServer.DClient, services.EtcdServeBackupContainerName, backupServer.Address); err != nil {
return err
}
}
// s3 backup case
if c.Services.Etcd.BackupConfig != nil && c.Services.Etcd.BackupConfig.S3BackupConfig != nil {
for _, host := range c.EtcdHosts {
if err := services.DownloadEtcdSnapshot(ctx, host, c.PrivateRegistriesMap, c.SystemImages.Alpine, snapshotPath, c.Services.Etcd); err != nil {
if err := services.DownloadEtcdSnapshotFromS3(ctx, host, c.PrivateRegistriesMap, c.SystemImages.Alpine, snapshotPath, c.Services.Etcd); err != nil {
return err
}
}
}
// this applies to all cases!
if isEqual := c.etcdSnapshotChecksum(ctx, snapshotPath); !isEqual {
return fmt.Errorf("etcd snapshots are not consistent")
}
return nil
}
func (c *Cluster) RestoreEtcdSnapshot(ctx context.Context, snapshotPath string) error {
// Start restore process on all etcd hosts
initCluster := services.GetEtcdInitialCluster(c.EtcdHosts)
for _, host := range c.EtcdHosts {
@@ -60,3 +105,22 @@ func (c *Cluster) etcdSnapshotChecksum(ctx context.Context, snapshotPath string)
}
return true
}
func isAutoSyncSupported(image string) bool {
v := strings.Split(image, ":")
last := v[len(v)-1]
sv, err := util.StrToSemVer(last)
if err != nil {
return false
}
supported, err := util.StrToSemVer(SupportedSyncToolsVersion)
if err != nil {
return false
}
if sv.LessThan(*supported) {
return false
}
return true
}

View File

@@ -130,7 +130,10 @@ func RestoreEtcdSnapshot(
if err := kubeCluster.TunnelHosts(ctx, flags); err != nil {
return err
}
// first download and check
if err := kubeCluster.PrepareBackup(ctx, snapshotName); err != nil {
return err
}
log.Infof(ctx, "Cleaning old kubernetes cluster")
if err := kubeCluster.CleanupNodes(ctx); err != nil {
return err

View File

@@ -335,7 +335,8 @@ func RunEtcdSnapshotSave(ctx context.Context, etcdHost *hosts.Host, prsMap map[s
return nil
}
func DownloadEtcdSnapshot(ctx context.Context, etcdHost *hosts.Host, prsMap map[string]v3.PrivateRegistry, etcdSnapshotImage string, name string, es v3.ETCDService) error {
func DownloadEtcdSnapshotFromS3(ctx context.Context, etcdHost *hosts.Host, prsMap map[string]v3.PrivateRegistry, etcdSnapshotImage string, name string, es v3.ETCDService) error {
log.Infof(ctx, "[etcd] Get snapshot [%s] on host [%s]", name, etcdHost.Address)
s3Backend := es.BackupConfig.S3BackupConfig
if len(s3Backend.Endpoint) == 0 || len(s3Backend.BucketName) == 0 {
@@ -347,6 +348,7 @@ func DownloadEtcdSnapshot(ctx context.Context, etcdHost *hosts.Host, prsMap map[
"etcd-backup",
"download",
"--name", name,
"--s3-backup=true",
"--s3-endpoint=" + s3Backend.Endpoint,
"--s3-accessKey=" + s3Backend.AccessKey,
"--s3-secretKey=" + s3Backend.SecretKey,
@@ -490,3 +492,69 @@ func configS3BackupImgCmd(ctx context.Context, imageCfg *container.Config, bc *v
imageCfg.Cmd = append(imageCfg.Cmd, cmd...)
return imageCfg
}
func StartBackupServer(ctx context.Context, etcdHost *hosts.Host, prsMap map[string]v3.PrivateRegistry, etcdSnapshotImage string, name string) error {
log.Infof(ctx, "[etcd] starting backup server on host [%s]", etcdHost.Address)
imageCfg := &container.Config{
Cmd: []string{
"/opt/rke-tools/rke-etcd-backup",
"etcd-backup",
"serve",
"--name", name,
"--cacert", pki.GetCertPath(pki.CACertName),
"--cert", pki.GetCertPath(pki.KubeNodeCertName),
"--key", pki.GetKeyPath(pki.KubeNodeCertName),
},
Image: etcdSnapshotImage,
}
hostCfg := &container.HostConfig{
Binds: []string{
fmt.Sprintf("%s:/backup", EtcdSnapshotPath),
fmt.Sprintf("%s:/etc/kubernetes:z", path.Join(etcdHost.PrefixPath, "/etc/kubernetes"))},
NetworkMode: container.NetworkMode("host"),
RestartPolicy: container.RestartPolicy{Name: "on-failure"},
}
return docker.DoRunContainer(ctx, etcdHost.DClient, imageCfg, hostCfg, EtcdServeBackupContainerName, etcdHost.Address, ETCDRole, prsMap)
}
func DownloadEtcdSnapshotFromBackupServer(ctx context.Context, etcdHost *hosts.Host, prsMap map[string]v3.PrivateRegistry, etcdSnapshotImage, name string, backupServer *hosts.Host) error {
log.Infof(ctx, "[etcd] Get snapshot [%s] on host [%s]", name, etcdHost.Address)
imageCfg := &container.Config{
Cmd: []string{
"/opt/rke-tools/rke-etcd-backup",
"etcd-backup",
"download",
"--name", name,
"--local-endpoint", backupServer.Address,
"--cacert", pki.GetCertPath(pki.CACertName),
"--cert", pki.GetCertPath(pki.KubeNodeCertName),
"--key", pki.GetKeyPath(pki.KubeNodeCertName),
},
Image: etcdSnapshotImage,
}
hostCfg := &container.HostConfig{
Binds: []string{
fmt.Sprintf("%s:/backup", EtcdSnapshotPath),
fmt.Sprintf("%s:/etc/kubernetes:z", path.Join(etcdHost.PrefixPath, "/etc/kubernetes"))},
NetworkMode: container.NetworkMode("host"),
RestartPolicy: container.RestartPolicy{Name: "on-failure"},
}
if err := docker.DoRunContainer(ctx, etcdHost.DClient, imageCfg, hostCfg, EtcdDownloadBackupContainerName, etcdHost.Address, ETCDRole, prsMap); err != nil {
return err
}
status, _, stderr, err := docker.GetContainerOutput(ctx, etcdHost.DClient, EtcdDownloadBackupContainerName, etcdHost.Address)
if status != 0 || err != nil {
if removeErr := docker.RemoveContainer(ctx, etcdHost.DClient, etcdHost.Address, EtcdDownloadBackupContainerName); removeErr != nil {
log.Warnf(ctx, "Failed to remove container [%s]: %v", removeErr)
}
if err != nil {
return err
}
return fmt.Errorf("Failed to download etcd snapshot from backup server [%s], exit code [%d]: %v", backupServer.Address, status, stderr)
}
return docker.RemoveContainer(ctx, etcdHost.DClient, etcdHost.Address, EtcdDownloadBackupContainerName)
}

View File

@@ -32,6 +32,7 @@ const (
EtcdSnapshotOnceContainerName = "etcd-snapshot-once"
EtcdRestoreContainerName = "etcd-restore"
EtcdDownloadBackupContainerName = "etcd-download-backup"
EtcdServeBackupContainerName = "etcd-Serve-backup"
EtcdChecksumContainerName = "etcd-checksum-checker"
NginxProxyContainerName = "nginx-proxy"
SidekickContainerName = "service-sidekick"