1
0
mirror of https://github.com/rancher/rke.git synced 2025-05-11 01:45:00 +00:00

add etcd s3 uploading and downloading snapshot feature

This commit is contained in:
Guangbo Chen 2018-12-13 16:46:47 +08:00 committed by Alena Prokharchyk
parent 9db25ef841
commit 9cfe5661d8
8 changed files with 204 additions and 54 deletions

View File

@ -90,12 +90,7 @@ func (c *Cluster) DeployControlPlane(ctx context.Context) error {
if len(c.Services.Etcd.ExternalURLs) > 0 {
log.Infof(ctx, "[etcd] External etcd connection string has been specified, skipping etcd plane")
} else {
etcdRollingSnapshot := services.EtcdSnapshot{
Snapshot: c.Services.Etcd.Snapshot,
Creation: c.Services.Etcd.Creation,
Retention: c.Services.Etcd.Retention,
}
if err := services.RunEtcdPlane(ctx, c.EtcdHosts, etcdNodePlanMap, c.LocalConnDialerFactory, c.PrivateRegistriesMap, c.UpdateWorkersOnly, c.SystemImages.Alpine, etcdRollingSnapshot); err != nil {
if err := services.RunEtcdPlane(ctx, c.EtcdHosts, etcdNodePlanMap, c.LocalConnDialerFactory, c.PrivateRegistriesMap, c.UpdateWorkersOnly, c.SystemImages.Alpine, c.Services.Etcd); err != nil {
return fmt.Errorf("[etcd] Failed to bring up Etcd Plane: %v", err)
}
}

View File

@ -37,11 +37,13 @@ const (
DefaultNetworkPlugin = "canal"
DefaultNetworkCloudProvider = "none"
DefaultIngressController = "nginx"
DefaultEtcdBackupCreationPeriod = "12h"
DefaultEtcdBackupRetentionPeriod = "72h"
DefaultEtcdSnapshot = true
DefaultMonitoringProvider = "metrics-server"
DefaultIngressController = "nginx"
DefaultEtcdBackupCreationPeriod = "12h"
DefaultEtcdBackupRetentionPeriod = "72h"
DefaultEtcdSnapshot = true
DefaultMonitoringProvider = "metrics-server"
DefaultEtcdBackupConfigIntervalHours = 12
DefaultEtcdBackupConfigRetention = 6
DefaultEtcdHeartbeatIntervalName = "heartbeat-interval"
DefaultEtcdHeartbeatIntervalValue = "500"
@ -183,6 +185,15 @@ func (c *Cluster) setClusterServicesDefaults() {
if _, ok := c.Services.Etcd.ExtraArgs[DefaultEtcdHeartbeatIntervalName]; !ok {
c.Services.Etcd.ExtraArgs[DefaultEtcdHeartbeatIntervalName] = DefaultEtcdHeartbeatIntervalValue
}
if c.Services.Etcd.BackupConfig != nil {
if c.Services.Etcd.BackupConfig.IntervalHours == 0 {
c.Services.Etcd.BackupConfig.IntervalHours = DefaultEtcdBackupConfigIntervalHours
}
if c.Services.Etcd.BackupConfig.Retention == 0 {
c.Services.Etcd.BackupConfig.Retention = DefaultEtcdBackupConfigRetention
}
}
}
func (c *Cluster) setClusterImageDefaults() {

View File

@ -10,7 +10,7 @@ import (
func (c *Cluster) SnapshotEtcd(ctx context.Context, snapshotName string) error {
for _, host := range c.EtcdHosts {
if err := services.RunEtcdSnapshotSave(ctx, host, c.PrivateRegistriesMap, c.SystemImages.Alpine, c.Services.Etcd.Creation, c.Services.Etcd.Retention, snapshotName, true); err != nil {
if err := services.RunEtcdSnapshotSave(ctx, host, c.PrivateRegistriesMap, c.SystemImages.Alpine, snapshotName, true, c.Services.Etcd); err != nil {
return err
}
}
@ -18,9 +18,19 @@ func (c *Cluster) SnapshotEtcd(ctx context.Context, snapshotName string) error {
}
func (c *Cluster) RestoreEtcdSnapshot(ctx context.Context, snapshotPath string) error {
// get etcd snapshots from s3 if backup backend server is set
if c.Services.Etcd.BackupConfig != nil && c.Services.Etcd.BackupConfig.S3BackupConfig != nil {
for _, host := range c.EtcdHosts {
if err := services.DownloadEtcdSnapshot(ctx, host, c.PrivateRegistriesMap, c.SystemImages.Alpine, snapshotPath, c.Services.Etcd); err != nil {
return err
}
}
}
if isEqual := c.etcdSnapshotChecksum(ctx, snapshotPath); !isEqual {
return fmt.Errorf("etcd snapshots are not consistent")
}
// Start restore process on all etcd hosts
initCluster := services.GetEtcdInitialCluster(c.EtcdHosts)
for _, host := range c.EtcdHosts {

View File

@ -119,6 +119,27 @@ func validateServicesOptions(c *Cluster) error {
return fmt.Errorf("External etcd path can't be empty")
}
}
// validate etcd s3 backup backend configurations
if err := validateEtcdBackupOptions(c); err != nil {
return err
}
return nil
}
func validateEtcdBackupOptions(c *Cluster) error {
if c.Services.Etcd.BackupConfig != nil {
if c.Services.Etcd.BackupConfig.S3BackupConfig == nil {
return fmt.Errorf("etcd backup is enabled but no s3 backend is specified")
}
if len(c.Services.Etcd.BackupConfig.S3BackupConfig.Endpoint) == 0 {
return fmt.Errorf("etcd s3 backup backend endpoint can't be empty")
}
if len(c.Services.Etcd.BackupConfig.S3BackupConfig.BucketName) == 0 {
return fmt.Errorf("etcd s3 backup backend bucketName can't be empty")
}
}
return nil
}

View File

@ -3,14 +3,15 @@ package cmd
import (
"context"
"fmt"
"io/ioutil"
"os"
"path/filepath"
"github.com/rancher/rke/cluster"
"github.com/rancher/rke/hosts"
"github.com/rancher/rke/log"
"github.com/rancher/types/apis/management.cattle.io/v3"
"github.com/urfave/cli"
"io/ioutil"
"os"
"path/filepath"
)
var commonFlags = []cli.Flag{
@ -32,7 +33,7 @@ func resolveClusterFile(ctx *cli.Context) (string, string, error) {
}
file, err := os.Open(fp)
if err != nil {
return "", "", fmt.Errorf("Can not find cluster configuration file: %v", err)
return "", "", fmt.Errorf("can not find cluster configuration file: %v", err)
}
defer file.Close()
buf, err := ioutil.ReadAll(file)
@ -53,6 +54,12 @@ func setOptionsFromCLI(c *cli.Context, rkeConfig *v3.RancherKubernetesEngineConf
rkeConfig.IgnoreDockerVersion = c.Bool("ignore-docker-version")
}
if c.Bool("s3") {
if rkeConfig.Services.Etcd.BackupConfig == nil {
rkeConfig.Services.Etcd.BackupConfig = &v3.BackupConfig{}
}
rkeConfig.Services.Etcd.BackupConfig.S3BackupConfig = setS3OptionsFromCLI(c)
}
return rkeConfig, nil
}
@ -91,3 +98,28 @@ func ClusterInit(ctx context.Context, rkeConfig *v3.RancherKubernetesEngineConfi
}
return rkeState.WriteStateFile(ctx, stateFilePath)
}
func setS3OptionsFromCLI(c *cli.Context) *v3.S3BackupConfig {
endpoint := c.String("s3-endpoint")
bucketName := c.String("bucket-name")
region := c.String("region")
accessKey := c.String("access-key")
secretKey := c.String("secret-key")
var s3BackupBackend = &v3.S3BackupConfig{}
if len(endpoint) != 0 {
s3BackupBackend.Endpoint = endpoint
}
if len(bucketName) != 0 {
s3BackupBackend.BucketName = bucketName
}
if len(region) != 0 {
s3BackupBackend.Region = region
}
if len(accessKey) != 0 {
s3BackupBackend.AccessKey = accessKey
}
if len(secretKey) != 0 {
s3BackupBackend.SecretKey = secretKey
}
return s3BackupBackend
}

View File

@ -14,11 +14,13 @@ import (
"github.com/urfave/cli"
)
const s3Endpoint = "s3.amazonaws.com"
func EtcdCommand() cli.Command {
snapshotFlags := []cli.Flag{
cli.StringFlag{
Name: "name",
Usage: "Specify Snapshot name",
Usage: "Specify snapshot name",
},
cli.StringFlag{
Name: "config",
@ -26,8 +28,32 @@ func EtcdCommand() cli.Command {
Value: pki.ClusterConfig,
EnvVar: "RKE_CONFIG",
},
cli.BoolFlag{
Name: "s3",
Usage: "Enabled backup to s3, set true or false",
},
cli.StringFlag{
Name: "s3-endpoint",
Usage: "Specify s3 endpoint url",
Value: s3Endpoint,
},
cli.StringFlag{
Name: "access-key",
Usage: "Specify s3 accessKey",
},
cli.StringFlag{
Name: "secret-key",
Usage: "Specify s3 secretKey",
},
cli.StringFlag{
Name: "bucket-name",
Usage: "Specify s3 bucket name",
},
cli.StringFlag{
Name: "region",
Usage: "Specify the s3 bucket location (optional)",
},
}
snapshotFlags = append(snapshotFlags, commonFlags...)
return cli.Command{
@ -132,12 +158,12 @@ func RestoreEtcdSnapshot(
func SnapshotSaveEtcdHostsFromCli(ctx *cli.Context) error {
clusterFile, filePath, err := resolveClusterFile(ctx)
if err != nil {
return fmt.Errorf("Failed to resolve cluster file: %v", err)
return fmt.Errorf("failed to resolve cluster file: %v", err)
}
rkeConfig, err := cluster.ParseConfig(clusterFile)
if err != nil {
return fmt.Errorf("Failed to parse cluster file: %v", err)
return fmt.Errorf("failed to parse cluster file: %v", err)
}
rkeConfig, err = setOptionsFromCLI(ctx, rkeConfig)
@ -159,12 +185,12 @@ func SnapshotSaveEtcdHostsFromCli(ctx *cli.Context) error {
func RestoreEtcdSnapshotFromCli(ctx *cli.Context) error {
clusterFile, filePath, err := resolveClusterFile(ctx)
if err != nil {
return fmt.Errorf("Failed to resolve cluster file: %v", err)
return fmt.Errorf("failed to resolve cluster file: %v", err)
}
rkeConfig, err := cluster.ParseConfig(clusterFile)
if err != nil {
return fmt.Errorf("Failed to parse cluster file: %v", err)
return fmt.Errorf("failed to parse cluster file: %v", err)
}
rkeConfig, err = setOptionsFromCLI(ctx, rkeConfig)
@ -173,7 +199,7 @@ func RestoreEtcdSnapshotFromCli(ctx *cli.Context) error {
}
etcdSnapshotName := ctx.String("name")
if etcdSnapshotName == "" {
return fmt.Errorf("You must specify the snapshot name to restore")
return fmt.Errorf("you must specify the snapshot name to restore")
}
// setting up the flags
flags := cluster.GetExternalFlags(false, false, false, "", filePath)

View File

@ -29,15 +29,6 @@ const (
EtcdSnapshotWaitTime = 5
)
type EtcdSnapshot struct {
// Enable or disable snapshot creation
Snapshot *bool
// Creation period of the etcd snapshots
Creation string
// Retention period of the etcd snapshots
Retention string
}
func RunEtcdPlane(
ctx context.Context,
etcdHosts []*hosts.Host,
@ -46,7 +37,7 @@ func RunEtcdPlane(
prsMap map[string]v3.PrivateRegistry,
updateWorkersOnly bool,
alpineImage string,
etcdSnapshot EtcdSnapshot) error {
es v3.ETCDService) error {
log.Infof(ctx, "[%s] Building up etcd plane..", ETCDRole)
for _, host := range etcdHosts {
if updateWorkersOnly {
@ -57,8 +48,8 @@ func RunEtcdPlane(
if err := docker.DoRunContainer(ctx, host.DClient, imageCfg, hostCfg, EtcdContainerName, host.Address, ETCDRole, prsMap); err != nil {
return err
}
if *etcdSnapshot.Snapshot == true {
if err := RunEtcdSnapshotSave(ctx, host, prsMap, alpineImage, etcdSnapshot.Creation, etcdSnapshot.Retention, EtcdSnapshotContainerName, false); err != nil {
if *es.Snapshot == true {
if err := RunEtcdSnapshotSave(ctx, host, prsMap, alpineImage, EtcdSnapshotContainerName, false, es); err != nil {
return err
}
if err := pki.SaveBackupBundleOnHost(ctx, host, alpineImage, EtcdSnapshotPath, prsMap); err != nil {
@ -262,12 +253,13 @@ func IsEtcdMember(ctx context.Context, etcdHost *hosts.Host, etcdHosts []*hosts.
return false, nil
}
func RunEtcdSnapshotSave(ctx context.Context, etcdHost *hosts.Host, prsMap map[string]v3.PrivateRegistry, etcdSnapshotImage string, creation, retention, name string, once bool) error {
func RunEtcdSnapshotSave(ctx context.Context, etcdHost *hosts.Host, prsMap map[string]v3.PrivateRegistry, etcdSnapshotImage string, name string, once bool, es v3.ETCDService) error {
log.Infof(ctx, "[etcd] Saving snapshot [%s] on host [%s]", name, etcdHost.Address)
imageCfg := &container.Config{
Cmd: []string{
"/opt/rke-tools/rke-etcd-backup",
"rolling-backup",
"etcd-backup",
"save",
"--cacert", pki.GetCertPath(pki.CACertName),
"--cert", pki.GetCertPath(pki.KubeNodeCertName),
"--key", pki.GetKeyPath(pki.KubeNodeCertName),
@ -278,10 +270,13 @@ func RunEtcdSnapshotSave(ctx context.Context, etcdHost *hosts.Host, prsMap map[s
}
if once {
imageCfg.Cmd = append(imageCfg.Cmd, "--once")
} else if es.BackupConfig == nil {
imageCfg.Cmd = append(imageCfg.Cmd, "--retention="+es.Retention)
imageCfg.Cmd = append(imageCfg.Cmd, "--creation="+es.Creation)
}
if !once {
imageCfg.Cmd = append(imageCfg.Cmd, "--retention="+retention)
imageCfg.Cmd = append(imageCfg.Cmd, "--creation="+creation)
if es.BackupConfig != nil && es.BackupConfig.S3BackupConfig != nil {
imageCfg = configS3BackupImgCmd(ctx, imageCfg, es.BackupConfig)
}
hostCfg := &container.HostConfig{
Binds: []string{
@ -297,6 +292,10 @@ func RunEtcdSnapshotSave(ctx context.Context, etcdHost *hosts.Host, prsMap map[s
}
status, err := docker.WaitForContainer(ctx, etcdHost.DClient, etcdHost.Address, EtcdSnapshotOnceContainerName)
if status != 0 || err != nil {
err := docker.RemoveContainer(ctx, etcdHost.DClient, etcdHost.Address, EtcdSnapshotOnceContainerName)
if err != nil {
return fmt.Errorf("Failed to take etcd snapshot exit code [%d], failed to exit container [%s]: %v ", status, EtcdDownloadBackupContainerName, err)
}
return fmt.Errorf("Failed to take etcd snapshot exit code [%d]: %v", status, err)
}
return docker.RemoveContainer(ctx, etcdHost.DClient, etcdHost.Address, EtcdSnapshotOnceContainerName)
@ -318,6 +317,45 @@ func RunEtcdSnapshotSave(ctx context.Context, etcdHost *hosts.Host, prsMap map[s
return nil
}
func DownloadEtcdSnapshot(ctx context.Context, etcdHost *hosts.Host, prsMap map[string]v3.PrivateRegistry, etcdSnapshotImage string, name string, es v3.ETCDService) error {
log.Infof(ctx, "[etcd] Get snapshot [%s] on host [%s]", name, etcdHost.Address)
s3Backend := es.BackupConfig.S3BackupConfig
if len(s3Backend.Endpoint) == 0 || len(s3Backend.BucketName) == 0 {
return fmt.Errorf("failed to get snapshot [%s] from s3 on host [%s], invalid s3 configurations", name, etcdHost.Address)
}
imageCfg := &container.Config{
Cmd: []string{
"/opt/rke-tools/rke-etcd-backup",
"etcd-backup",
"download",
"--name", name,
},
Image: etcdSnapshotImage,
}
imageCfg = configS3BackupImgCmd(ctx, imageCfg, es.BackupConfig)
hostCfg := &container.HostConfig{
Binds: []string{
fmt.Sprintf("%s:/backup", EtcdSnapshotPath),
fmt.Sprintf("%s:/etc/kubernetes:z", path.Join(etcdHost.PrefixPath, "/etc/kubernetes"))},
NetworkMode: container.NetworkMode("host"),
RestartPolicy: container.RestartPolicy{Name: "always"},
}
if err := docker.DoRunContainer(ctx, etcdHost.DClient, imageCfg, hostCfg, EtcdDownloadBackupContainerName, etcdHost.Address, ETCDRole, prsMap); err != nil {
return err
}
status, err := docker.WaitForContainer(ctx, etcdHost.DClient, etcdHost.Address, EtcdDownloadBackupContainerName)
if status != 0 || err != nil {
err := docker.RemoveContainer(ctx, etcdHost.DClient, etcdHost.Address, EtcdDownloadBackupContainerName)
if err != nil {
return fmt.Errorf("Failed to get etcd snapshot from s3 exit code [%d], failed to exit container [%s]: %v ", status, EtcdDownloadBackupContainerName, err)
}
return fmt.Errorf("Failed to get etcd snapshot from s3 exit code [%d]: %v", status, err)
}
return docker.RemoveContainer(ctx, etcdHost.DClient, etcdHost.Address, EtcdDownloadBackupContainerName)
}
func RestoreEtcdSnapshot(ctx context.Context, etcdHost *hosts.Host, prsMap map[string]v3.PrivateRegistry, etcdRestoreImage, snapshotName, initCluster string) error {
log.Infof(ctx, "[etcd] Restoring [%s] snapshot on etcd host [%s]", snapshotName, etcdHost.Address)
nodeName := pki.GetEtcdCrtName(etcdHost.InternalAddress)
@ -407,3 +445,19 @@ func GetEtcdSnapshotChecksum(ctx context.Context, etcdHost *hosts.Host, prsMap m
}
return checksum, nil
}
func configS3BackupImgCmd(ctx context.Context, imageCfg *container.Config, bc *v3.BackupConfig) *container.Config {
log.Infof(ctx, "Invoking s3 backup server cmd config, bucketName:%s, endpoint:%s", bc.S3BackupConfig.BucketName, bc.S3BackupConfig.Endpoint)
cmd := []string{
"--s3-backup=true",
"--s3-endpoint=" + bc.S3BackupConfig.Endpoint,
"--s3-accessKey=" + bc.S3BackupConfig.AccessKey,
"--s3-secretKey=" + bc.S3BackupConfig.SecretKey,
"--s3-bucketName=" + bc.S3BackupConfig.BucketName,
"--s3-region=" + bc.S3BackupConfig.Region,
"--creation=" + fmt.Sprintf("%dh", bc.IntervalHours),
"--retention=" + fmt.Sprintf("%dh", bc.Retention*bc.IntervalHours),
}
imageCfg.Cmd = append(imageCfg.Cmd, cmd...)
return imageCfg
}

View File

@ -22,20 +22,21 @@ const (
SidekickServiceName = "sidekick"
RBACAuthorizationMode = "rbac"
KubeAPIContainerName = "kube-apiserver"
KubeletContainerName = "kubelet"
KubeproxyContainerName = "kube-proxy"
KubeControllerContainerName = "kube-controller-manager"
SchedulerContainerName = "kube-scheduler"
EtcdContainerName = "etcd"
EtcdSnapshotContainerName = "etcd-rolling-snapshots"
EtcdSnapshotOnceContainerName = "etcd-snapshot-once"
EtcdRestoreContainerName = "etcd-restore"
EtcdChecksumContainerName = "etcd-checksum-checker"
NginxProxyContainerName = "nginx-proxy"
SidekickContainerName = "service-sidekick"
LogLinkContainerName = "rke-log-linker"
LogCleanerContainerName = "rke-log-cleaner"
KubeAPIContainerName = "kube-apiserver"
KubeletContainerName = "kubelet"
KubeproxyContainerName = "kube-proxy"
KubeControllerContainerName = "kube-controller-manager"
SchedulerContainerName = "kube-scheduler"
EtcdContainerName = "etcd"
EtcdSnapshotContainerName = "etcd-rolling-snapshots"
EtcdSnapshotOnceContainerName = "etcd-snapshot-once"
EtcdRestoreContainerName = "etcd-restore"
EtcdDownloadBackupContainerName = "etcd-download-backup"
EtcdChecksumContainerName = "etcd-checksum-checker"
NginxProxyContainerName = "nginx-proxy"
SidekickContainerName = "service-sidekick"
LogLinkContainerName = "rke-log-linker"
LogCleanerContainerName = "rke-log-cleaner"
KubeAPIPort = 6443
SchedulerPort = 10251