mirror of
https://github.com/rancher/rke.git
synced 2025-10-22 11:53:06 +00:00
Automatically sync local backups
This commit is contained in:
committed by
Alena Prokharchyk
parent
fa332f7e07
commit
285ac8d81c
@@ -3,9 +3,17 @@ package cluster
|
|||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"github.com/rancher/rke/docker"
|
||||||
|
"github.com/rancher/rke/hosts"
|
||||||
"github.com/rancher/rke/log"
|
"github.com/rancher/rke/log"
|
||||||
"github.com/rancher/rke/services"
|
"github.com/rancher/rke/services"
|
||||||
|
"github.com/rancher/rke/util"
|
||||||
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
SupportedSyncToolsVersion = "0.1.22"
|
||||||
)
|
)
|
||||||
|
|
||||||
func (c *Cluster) SnapshotEtcd(ctx context.Context, snapshotName string) error {
|
func (c *Cluster) SnapshotEtcd(ctx context.Context, snapshotName string) error {
|
||||||
@@ -17,20 +25,57 @@ func (c *Cluster) SnapshotEtcd(ctx context.Context, snapshotName string) error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c *Cluster) RestoreEtcdSnapshot(ctx context.Context, snapshotPath string) error {
|
func (c *Cluster) PrepareBackup(ctx context.Context, snapshotPath string) error {
|
||||||
// get etcd snapshots from s3 if backup backend server is set
|
// local backup case
|
||||||
|
var backupServer *hosts.Host
|
||||||
|
// stop etcd on all etcd nodes, we need this because we start the backup server on the same port
|
||||||
|
if !isAutoSyncSupported(c.SystemImages.Alpine) {
|
||||||
|
log.Warnf(ctx, "Auto local backup sync is not supported. Use `rancher/rke-tools:%s` or up", SupportedSyncToolsVersion)
|
||||||
|
} else if c.Services.Etcd.BackupConfig == nil || // legacy rke local backup
|
||||||
|
(c.Services.Etcd.BackupConfig != nil && c.Services.Etcd.BackupConfig.S3BackupConfig == nil) { // rancher local backup, no s3
|
||||||
|
for _, host := range c.EtcdHosts {
|
||||||
|
if err := docker.StopContainer(ctx, host.DClient, host.Address, services.EtcdContainerName); err != nil {
|
||||||
|
log.Warnf(ctx, "failed to stop etcd container on host [%s]: %v", host.Address, err)
|
||||||
|
}
|
||||||
|
if backupServer == nil { // start the download server, only one node should have it!
|
||||||
|
if err := services.StartBackupServer(ctx, host, c.PrivateRegistriesMap, c.SystemImages.Alpine, snapshotPath); err != nil {
|
||||||
|
log.Warnf(ctx, "failed to start backup server on host [%s]: %v", host.Address, err)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
backupServer = host
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// start downloading the snapshot
|
||||||
|
for _, host := range c.EtcdHosts {
|
||||||
|
if backupServer != nil && host.Address == backupServer.Address { // we skip the backup server if it's there
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if err := services.DownloadEtcdSnapshotFromBackupServer(ctx, host, c.PrivateRegistriesMap, c.SystemImages.Alpine, snapshotPath, backupServer); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// all good, let's remove the backup server container
|
||||||
|
if err := docker.DoRemoveContainer(ctx, backupServer.DClient, services.EtcdServeBackupContainerName, backupServer.Address); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// s3 backup case
|
||||||
if c.Services.Etcd.BackupConfig != nil && c.Services.Etcd.BackupConfig.S3BackupConfig != nil {
|
if c.Services.Etcd.BackupConfig != nil && c.Services.Etcd.BackupConfig.S3BackupConfig != nil {
|
||||||
for _, host := range c.EtcdHosts {
|
for _, host := range c.EtcdHosts {
|
||||||
if err := services.DownloadEtcdSnapshot(ctx, host, c.PrivateRegistriesMap, c.SystemImages.Alpine, snapshotPath, c.Services.Etcd); err != nil {
|
if err := services.DownloadEtcdSnapshotFromS3(ctx, host, c.PrivateRegistriesMap, c.SystemImages.Alpine, snapshotPath, c.Services.Etcd); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// this applies to all cases!
|
||||||
if isEqual := c.etcdSnapshotChecksum(ctx, snapshotPath); !isEqual {
|
if isEqual := c.etcdSnapshotChecksum(ctx, snapshotPath); !isEqual {
|
||||||
return fmt.Errorf("etcd snapshots are not consistent")
|
return fmt.Errorf("etcd snapshots are not consistent")
|
||||||
}
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
func (c *Cluster) RestoreEtcdSnapshot(ctx context.Context, snapshotPath string) error {
|
||||||
// Start restore process on all etcd hosts
|
// Start restore process on all etcd hosts
|
||||||
initCluster := services.GetEtcdInitialCluster(c.EtcdHosts)
|
initCluster := services.GetEtcdInitialCluster(c.EtcdHosts)
|
||||||
for _, host := range c.EtcdHosts {
|
for _, host := range c.EtcdHosts {
|
||||||
@@ -60,3 +105,22 @@ func (c *Cluster) etcdSnapshotChecksum(ctx context.Context, snapshotPath string)
|
|||||||
}
|
}
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func isAutoSyncSupported(image string) bool {
|
||||||
|
v := strings.Split(image, ":")
|
||||||
|
last := v[len(v)-1]
|
||||||
|
|
||||||
|
sv, err := util.StrToSemVer(last)
|
||||||
|
if err != nil {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
supported, err := util.StrToSemVer(SupportedSyncToolsVersion)
|
||||||
|
if err != nil {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
if sv.LessThan(*supported) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
@@ -130,7 +130,10 @@ func RestoreEtcdSnapshot(
|
|||||||
if err := kubeCluster.TunnelHosts(ctx, flags); err != nil {
|
if err := kubeCluster.TunnelHosts(ctx, flags); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
// first download and check
|
||||||
|
if err := kubeCluster.PrepareBackup(ctx, snapshotName); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
log.Infof(ctx, "Cleaning old kubernetes cluster")
|
log.Infof(ctx, "Cleaning old kubernetes cluster")
|
||||||
if err := kubeCluster.CleanupNodes(ctx); err != nil {
|
if err := kubeCluster.CleanupNodes(ctx); err != nil {
|
||||||
return err
|
return err
|
||||||
|
@@ -335,7 +335,8 @@ func RunEtcdSnapshotSave(ctx context.Context, etcdHost *hosts.Host, prsMap map[s
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func DownloadEtcdSnapshot(ctx context.Context, etcdHost *hosts.Host, prsMap map[string]v3.PrivateRegistry, etcdSnapshotImage string, name string, es v3.ETCDService) error {
|
func DownloadEtcdSnapshotFromS3(ctx context.Context, etcdHost *hosts.Host, prsMap map[string]v3.PrivateRegistry, etcdSnapshotImage string, name string, es v3.ETCDService) error {
|
||||||
|
|
||||||
log.Infof(ctx, "[etcd] Get snapshot [%s] on host [%s]", name, etcdHost.Address)
|
log.Infof(ctx, "[etcd] Get snapshot [%s] on host [%s]", name, etcdHost.Address)
|
||||||
s3Backend := es.BackupConfig.S3BackupConfig
|
s3Backend := es.BackupConfig.S3BackupConfig
|
||||||
if len(s3Backend.Endpoint) == 0 || len(s3Backend.BucketName) == 0 {
|
if len(s3Backend.Endpoint) == 0 || len(s3Backend.BucketName) == 0 {
|
||||||
@@ -347,6 +348,7 @@ func DownloadEtcdSnapshot(ctx context.Context, etcdHost *hosts.Host, prsMap map[
|
|||||||
"etcd-backup",
|
"etcd-backup",
|
||||||
"download",
|
"download",
|
||||||
"--name", name,
|
"--name", name,
|
||||||
|
"--s3-backup=true",
|
||||||
"--s3-endpoint=" + s3Backend.Endpoint,
|
"--s3-endpoint=" + s3Backend.Endpoint,
|
||||||
"--s3-accessKey=" + s3Backend.AccessKey,
|
"--s3-accessKey=" + s3Backend.AccessKey,
|
||||||
"--s3-secretKey=" + s3Backend.SecretKey,
|
"--s3-secretKey=" + s3Backend.SecretKey,
|
||||||
@@ -490,3 +492,69 @@ func configS3BackupImgCmd(ctx context.Context, imageCfg *container.Config, bc *v
|
|||||||
imageCfg.Cmd = append(imageCfg.Cmd, cmd...)
|
imageCfg.Cmd = append(imageCfg.Cmd, cmd...)
|
||||||
return imageCfg
|
return imageCfg
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func StartBackupServer(ctx context.Context, etcdHost *hosts.Host, prsMap map[string]v3.PrivateRegistry, etcdSnapshotImage string, name string) error {
|
||||||
|
log.Infof(ctx, "[etcd] starting backup server on host [%s]", etcdHost.Address)
|
||||||
|
|
||||||
|
imageCfg := &container.Config{
|
||||||
|
Cmd: []string{
|
||||||
|
"/opt/rke-tools/rke-etcd-backup",
|
||||||
|
"etcd-backup",
|
||||||
|
"serve",
|
||||||
|
"--name", name,
|
||||||
|
"--cacert", pki.GetCertPath(pki.CACertName),
|
||||||
|
"--cert", pki.GetCertPath(pki.KubeNodeCertName),
|
||||||
|
"--key", pki.GetKeyPath(pki.KubeNodeCertName),
|
||||||
|
},
|
||||||
|
Image: etcdSnapshotImage,
|
||||||
|
}
|
||||||
|
hostCfg := &container.HostConfig{
|
||||||
|
Binds: []string{
|
||||||
|
fmt.Sprintf("%s:/backup", EtcdSnapshotPath),
|
||||||
|
fmt.Sprintf("%s:/etc/kubernetes:z", path.Join(etcdHost.PrefixPath, "/etc/kubernetes"))},
|
||||||
|
NetworkMode: container.NetworkMode("host"),
|
||||||
|
RestartPolicy: container.RestartPolicy{Name: "on-failure"},
|
||||||
|
}
|
||||||
|
return docker.DoRunContainer(ctx, etcdHost.DClient, imageCfg, hostCfg, EtcdServeBackupContainerName, etcdHost.Address, ETCDRole, prsMap)
|
||||||
|
}
|
||||||
|
|
||||||
|
func DownloadEtcdSnapshotFromBackupServer(ctx context.Context, etcdHost *hosts.Host, prsMap map[string]v3.PrivateRegistry, etcdSnapshotImage, name string, backupServer *hosts.Host) error {
|
||||||
|
log.Infof(ctx, "[etcd] Get snapshot [%s] on host [%s]", name, etcdHost.Address)
|
||||||
|
imageCfg := &container.Config{
|
||||||
|
Cmd: []string{
|
||||||
|
"/opt/rke-tools/rke-etcd-backup",
|
||||||
|
"etcd-backup",
|
||||||
|
"download",
|
||||||
|
"--name", name,
|
||||||
|
"--local-endpoint", backupServer.Address,
|
||||||
|
"--cacert", pki.GetCertPath(pki.CACertName),
|
||||||
|
"--cert", pki.GetCertPath(pki.KubeNodeCertName),
|
||||||
|
"--key", pki.GetKeyPath(pki.KubeNodeCertName),
|
||||||
|
},
|
||||||
|
Image: etcdSnapshotImage,
|
||||||
|
}
|
||||||
|
|
||||||
|
hostCfg := &container.HostConfig{
|
||||||
|
Binds: []string{
|
||||||
|
fmt.Sprintf("%s:/backup", EtcdSnapshotPath),
|
||||||
|
fmt.Sprintf("%s:/etc/kubernetes:z", path.Join(etcdHost.PrefixPath, "/etc/kubernetes"))},
|
||||||
|
NetworkMode: container.NetworkMode("host"),
|
||||||
|
RestartPolicy: container.RestartPolicy{Name: "on-failure"},
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := docker.DoRunContainer(ctx, etcdHost.DClient, imageCfg, hostCfg, EtcdDownloadBackupContainerName, etcdHost.Address, ETCDRole, prsMap); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
status, _, stderr, err := docker.GetContainerOutput(ctx, etcdHost.DClient, EtcdDownloadBackupContainerName, etcdHost.Address)
|
||||||
|
if status != 0 || err != nil {
|
||||||
|
if removeErr := docker.RemoveContainer(ctx, etcdHost.DClient, etcdHost.Address, EtcdDownloadBackupContainerName); removeErr != nil {
|
||||||
|
log.Warnf(ctx, "Failed to remove container [%s]: %v", removeErr)
|
||||||
|
}
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
return fmt.Errorf("Failed to download etcd snapshot from backup server [%s], exit code [%d]: %v", backupServer.Address, status, stderr)
|
||||||
|
}
|
||||||
|
return docker.RemoveContainer(ctx, etcdHost.DClient, etcdHost.Address, EtcdDownloadBackupContainerName)
|
||||||
|
}
|
||||||
|
@@ -32,6 +32,7 @@ const (
|
|||||||
EtcdSnapshotOnceContainerName = "etcd-snapshot-once"
|
EtcdSnapshotOnceContainerName = "etcd-snapshot-once"
|
||||||
EtcdRestoreContainerName = "etcd-restore"
|
EtcdRestoreContainerName = "etcd-restore"
|
||||||
EtcdDownloadBackupContainerName = "etcd-download-backup"
|
EtcdDownloadBackupContainerName = "etcd-download-backup"
|
||||||
|
EtcdServeBackupContainerName = "etcd-Serve-backup"
|
||||||
EtcdChecksumContainerName = "etcd-checksum-checker"
|
EtcdChecksumContainerName = "etcd-checksum-checker"
|
||||||
NginxProxyContainerName = "nginx-proxy"
|
NginxProxyContainerName = "nginx-proxy"
|
||||||
SidekickContainerName = "service-sidekick"
|
SidekickContainerName = "service-sidekick"
|
||||||
|
Reference in New Issue
Block a user