1
0
mirror of https://github.com/rancher/rke.git synced 2025-07-05 19:47:47 +00:00

Etcd Backup/restore

This commit is contained in:
galal-hussein 2018-05-09 19:39:19 +02:00
parent 5d718ad123
commit a3416e6b92
13 changed files with 479 additions and 15 deletions

117
README.md
View File

@ -363,6 +363,7 @@ nodes:
```
## Deploying Rancher 2.0 using rke
Using RKE's pluggable user addons, it's possible to deploy Rancher 2.0 server in HA with a single command.
Depending how you want to manage your ssl certificates, there are 2 deployment options:
@ -416,6 +417,122 @@ kubectl -n cattle-system scale deployment cattle --replicas=3
# chown <user> /var/run/docker.sock
```
## Etcd Backup and Restoration
You can configure a Rancher Kubernetes Engine (RKE) cluster to automatically create backups of etcd. In a disaster scenario, you can restore these backups, which are stored on other cluster nodes.
### Etcd Regular Backup
To schedule a recurring automatic etcd backup, enable the `etcd-backup` service. `etcd-backup` runs in a service container alongside the `etcd` container. `etcd-backup` automatically creates backups and stores them to its local disk.
To enable `etcd-backup` in RKE CLI, configure the following three variables:
```
services:
etcd:
backup: true
creation: 5m0s
retention: 24h
```
- `backup`: Enables/disables etcd backups in the RKE cluster.
Default value: `false`.
- `creation`: Time period in which `etcd-backup` creates and stores local backups.
Default value: `5m0s`
- `retention`: Time period before before an etcd backup expires. Expired backups are purged.
Default value: `24h`
After RKE runs, view the `etcd-backup` logs to confirm backups are being created automatically:
```
# docker logs etcd-backup
time="2018-05-04T18:39:16Z" level=info msg="Initializing Rolling Backups" creation=1m0s retention=24h0m0s
time="2018-05-04T18:40:16Z" level=info msg="Created backup" name="2018-05-04T18:40:16Z_etcd" runtime=108.332814ms
time="2018-05-04T18:41:16Z" level=info msg="Created backup" name="2018-05-04T18:41:16Z_etcd" runtime=92.880112ms
time="2018-05-04T18:42:16Z" level=info msg="Created backup" name="2018-05-04T18:42:16Z_etcd" runtime=83.67642ms
time="2018-05-04T18:43:16Z" level=info msg="Created backup" name="2018-05-04T18:43:16Z_etcd" runtime=86.298499ms
```
Backups are saved to the following directory: `/opt/rke/etcdbackup/`. Backups are created on each node that runs etcd.
### Etcd onetime Snapshots
RKE also added two commands that for etcd backup management:
```
./rke etcd backup [NAME]
```
and
```
./rke etcd restore [NAME]
```
The backup command saves a snapshot of etcd in `/opt/rke/etcdbackup`. This command also creates a container for the backup. When the backup completes, the container is removed.
```
# ./rke etcd backup --name snapshot
INFO[0000] Starting Backup on etcd hosts
INFO[0000] [dialer] Setup tunnel for host [x.x.x.x]
INFO[0002] [dialer] Setup tunnel for host [y.y.y.y]
INFO[0004] [dialer] Setup tunnel for host [z.z.z.z]
INFO[0006] [etcd] Starting backup on host [x.x.x.x]
INFO[0007] [etcd] Successfully started [etcd-backup-once] container on host [x.x.x.x]
INFO[0007] [etcd] Starting backup on host [y.y.y.y]
INFO[0009] [etcd] Successfully started [etcd-backup-once] container on host [y.y.y.y]
INFO[0010] [etcd] Starting backup on host [z.z.z.z]
INFO[0011] [etcd] Successfully started [etcd-backup-once] container on host [z.z.z.z]
INFO[0011] Finished backup on all etcd hosts
```
### Etcd Disaster recovery
`etcd restore` is used for etcd Disaster recovery, it reverts to any snapshot stored in `/opt/rke/etcdbackup` that you explicitly define. When you run `etcd restire`, RKE removes the old etcd container if it still exists. To restore operations, RKE creates a new etcd cluster using the snapshot you choose.
>**Warning:** Restoring an etcd backup deletes your current etcd cluster and replaces it with a new one. Before you run the `etcd restore` command, backup any important data in your current cluster.
```
./rke etcd restore --name snapshot --config test-aws.yml
INFO[0000] Starting restore on etcd hosts
INFO[0000] [dialer] Setup tunnel for host [x.x.x.x]
INFO[0002] [dialer] Setup tunnel for host [y.y.y.y]
INFO[0005] [dialer] Setup tunnel for host [z.z.z.z]
INFO[0007] [hosts] Cleaning up host [x.x.x.x]
INFO[0007] [hosts] Running cleaner container on host [x.x.x.x]
INFO[0008] [kube-cleaner] Successfully started [kube-cleaner] container on host [x.x.x.x]
INFO[0008] [hosts] Removing cleaner container on host [x.x.x.x]
INFO[0008] [hosts] Successfully cleaned up host [x.x.x.x]
INFO[0009] [hosts] Cleaning up host [y.y.y.y]
INFO[0009] [hosts] Running cleaner container on host [y.y.y.y]
INFO[0010] [kube-cleaner] Successfully started [kube-cleaner] container on host [y.y.y.y]
INFO[0010] [hosts] Removing cleaner container on host [y.y.y.y]
INFO[0010] [hosts] Successfully cleaned up host [y.y.y.y]
INFO[0011] [hosts] Cleaning up host [z.z.z.z]
INFO[0011] [hosts] Running cleaner container on host [z.z.z.z]
INFO[0012] [kube-cleaner] Successfully started [kube-cleaner] container on host [z.z.z.z]
INFO[0012] [hosts] Removing cleaner container on host [z.z.z.z]
INFO[0012] [hosts] Successfully cleaned up host [z.z.z.z]
INFO[0012] [etcd] Restoring [snapshot] snapshot on etcd host [x.x.x.x]
INFO[0013] [etcd] Successfully started [etcd-restore] container on host [x.x.x.x]
INFO[0014] [etcd] Restoring [snapshot] snapshot on etcd host [y.y.y.y]
INFO[0015] [etcd] Successfully started [etcd-restore] container on host [y.y.y.y]
INFO[0015] [etcd] Restoring [snapshot] snapshot on etcd host [z.z.z.z]
INFO[0016] [etcd] Successfully started [etcd-restore] container on host [z.z.z.z]
INFO[0017] [etcd] Building up etcd plane..
INFO[0018] [etcd] Successfully started [etcd] container on host [x.x.x.x]
INFO[0020] [etcd] Successfully started [rke-log-linker] container on host [x.x.x.x]
INFO[0021] [remove/rke-log-linker] Successfully removed container on host [x.x.x.x]
INFO[0022] [etcd] Successfully started [etcd] container on host [y.y.y.y]
INFO[0023] [etcd] Successfully started [rke-log-linker] container on host [y.y.y.y]
INFO[0025] [remove/rke-log-linker] Successfully removed container on host [y.y.y.y]
INFO[0025] [etcd] Successfully started [etcd] container on host [z.z.z.z]
INFO[0027] [etcd] Successfully started [rke-log-linker] container on host [z.z.z.z]
INFO[0027] [remove/rke-log-linker] Successfully removed container on host [z.z.z.z]
INFO[0027] [etcd] Successfully started etcd plane..
INFO[0027] Finished restoring on all etcd hosts
```
## License
Copyright (c) 2018 [Rancher Labs, Inc.](http://rancher.com)

View File

@ -228,3 +228,21 @@ func fetchBackupCertificates(ctx context.Context, backupHosts []*hosts.Host, kub
// reporting the last error only.
return nil, err
}
func fetchCertificatesFromEtcd(ctx context.Context, kubeCluster *Cluster) ([]byte, []byte, error) {
// Get kubernetes certificates from the etcd hosts
certificates := map[string]pki.CertificatePKI{}
var err error
for _, host := range kubeCluster.EtcdHosts {
certificates, err = pki.FetchCertificatesFromHost(ctx, kubeCluster.EtcdHosts, host, kubeCluster.SystemImages.Alpine, kubeCluster.LocalKubeConfigPath, kubeCluster.PrivateRegistriesMap)
if certificates != nil {
break
}
}
if err != nil || certificates == nil {
return nil, nil, fmt.Errorf("Failed to fetch certificates from etcd hosts: %v", err)
}
clientCert := cert.EncodeCertPEM(certificates[pki.KubeNodeCertName].Certificate)
clientkey := cert.EncodePrivateKeyPEM(certificates[pki.KubeNodeCertName].Key)
return clientCert, clientkey, nil
}

View File

@ -77,7 +77,12 @@ func (c *Cluster) DeployControlPlane(ctx context.Context) error {
if len(c.Services.Etcd.ExternalURLs) > 0 {
log.Infof(ctx, "[etcd] External etcd connection string has been specified, skipping etcd plane")
} else {
if err := services.RunEtcdPlane(ctx, c.EtcdHosts, etcdNodePlanMap, c.LocalConnDialerFactory, c.PrivateRegistriesMap, c.UpdateWorkersOnly, c.SystemImages.Alpine); err != nil {
etcdBackup := services.EtcdBackup{
Backup: c.Services.Etcd.Backup,
Creation: c.Services.Etcd.Creation,
Retention: c.Services.Etcd.Retention,
}
if err := services.RunEtcdPlane(ctx, c.EtcdHosts, etcdNodePlanMap, c.LocalConnDialerFactory, c.PrivateRegistriesMap, c.UpdateWorkersOnly, c.SystemImages.Alpine, etcdBackup); err != nil {
return fmt.Errorf("[etcd] Failed to bring up Etcd Plane: %v", err)
}
}

View File

@ -28,6 +28,8 @@ const (
DefaultNetworkCloudProvider = "none"
DefaultIngressController = "nginx"
DefaultEtcdBackupCreationPeriod = "5m0s"
DefaultEtcdBackupRetentionPeriod = "24h"
)
func setDefaultIfEmptyMapValue(configMap map[string]string, key string, value string) {
@ -105,6 +107,8 @@ func (c *Cluster) setClusterServicesDefaults() {
&c.Services.Kubelet.Image: c.SystemImages.Kubernetes,
&c.Services.Kubeproxy.Image: c.SystemImages.Kubernetes,
&c.Services.Etcd.Image: c.SystemImages.Etcd,
&c.Services.Etcd.Creation: DefaultEtcdBackupCreationPeriod,
&c.Services.Etcd.Retention: DefaultEtcdBackupRetentionPeriod,
}
for k, v := range serviceConfigDefaultsMap {
setDefaultIfEmpty(k, v)

63
cluster/etcd.go Normal file
View File

@ -0,0 +1,63 @@
package cluster
import (
"context"
"fmt"
"path"
"github.com/rancher/rke/docker"
"github.com/rancher/rke/hosts"
"github.com/rancher/rke/services"
"github.com/rancher/types/apis/management.cattle.io/v3"
)
func (c *Cluster) BackupEtcd(ctx context.Context, backupName string) error {
for _, host := range c.EtcdHosts {
if err := services.RunEtcdBackup(ctx, host, c.PrivateRegistriesMap, c.SystemImages.Alpine, c.Services.Etcd.Creation, c.Services.Etcd.Retention, backupName, true); err != nil {
return err
}
}
return nil
}
func (c *Cluster) RestoreEtcdBackup(ctx context.Context, backupPath string) error {
// Stopping all etcd containers
for _, host := range c.EtcdHosts {
if err := tearDownOldEtcd(ctx, host, c.SystemImages.Alpine, c.PrivateRegistriesMap); err != nil {
return err
}
}
// Start restore process on all etcd hosts
initCluster := services.GetEtcdInitialCluster(c.EtcdHosts)
for _, host := range c.EtcdHosts {
if err := services.RestoreEtcdBackup(ctx, host, c.PrivateRegistriesMap, c.SystemImages.Etcd, backupPath, initCluster); err != nil {
return fmt.Errorf("[etcd] Failed to restore etcd backup: %v", err)
}
}
// Deploy Etcd Plane
etcdNodePlanMap := make(map[string]v3.RKEConfigNodePlan)
// Build etcd node plan map
for _, etcdHost := range c.EtcdHosts {
etcdNodePlanMap[etcdHost.Address] = BuildRKEConfigNodePlan(ctx, c, etcdHost, etcdHost.DockerInfo)
}
etcdBackup := services.EtcdBackup{
Backup: c.Services.Etcd.Backup,
Creation: c.Services.Etcd.Creation,
Retention: c.Services.Etcd.Retention,
}
if err := services.RunEtcdPlane(ctx, c.EtcdHosts, etcdNodePlanMap, c.LocalConnDialerFactory, c.PrivateRegistriesMap, c.UpdateWorkersOnly, c.SystemImages.Alpine, etcdBackup); err != nil {
return fmt.Errorf("[etcd] Failed to bring up Etcd Plane: %v", err)
}
return nil
}
func tearDownOldEtcd(ctx context.Context, host *hosts.Host, cleanupImage string, prsMap map[string]v3.PrivateRegistry) error {
if err := docker.DoRemoveContainer(ctx, host.DClient, services.EtcdContainerName, host.Address); err != nil {
return fmt.Errorf("[etcd] Failed to stop old etcd containers: %v", err)
}
// cleanup etcd data directory
toCleanPaths := []string{
path.Join(host.PrefixPath, hosts.ToCleanEtcdDir),
}
return host.CleanUp(ctx, toCleanPaths, cleanupImage, prsMap)
}

View File

@ -593,7 +593,7 @@ func (c *Cluster) BuildEtcdProcess(host *hosts.Host, etcdHosts []*hosts.Host, pr
}
Binds := []string{
fmt.Sprintf("%s:/var/lib/rancher/etcd:z", path.Join(prefixPath, "/var/lib/etcd")),
fmt.Sprintf("%s:/var/lib/rancher/:z", path.Join(prefixPath, "/var/lib/")),
fmt.Sprintf("%s:/etc/kubernetes:z", path.Join(prefixPath, "/etc/kubernetes")),
}

136
cmd/etcd.go Normal file
View File

@ -0,0 +1,136 @@
package cmd
import (
"context"
"fmt"
"github.com/rancher/rke/cluster"
"github.com/rancher/rke/hosts"
"github.com/rancher/rke/log"
"github.com/rancher/rke/pki"
"github.com/rancher/types/apis/management.cattle.io/v3"
"github.com/urfave/cli"
)
func EtcdCommand() cli.Command {
backupRestoreFlags := []cli.Flag{
cli.StringFlag{
Name: "name",
Usage: "Specify Backup name",
},
cli.StringFlag{
Name: "config",
Usage: "Specify an alternate cluster YAML file",
Value: pki.ClusterConfig,
EnvVar: "RKE_CONFIG",
},
}
backupRestoreFlags = append(backupRestoreFlags, sshCliOptions...)
return cli.Command{
Name: "etcd",
Usage: "etcd backup/restore operations in k8s cluster",
Subcommands: []cli.Command{
{
Name: "backup",
Usage: "Take backup on all etcd hosts",
Flags: backupRestoreFlags,
Action: BackupEtcdHostsFromCli,
},
{
Name: "restore",
Usage: "Restore existing backup",
Flags: backupRestoreFlags,
Action: RestoreEtcdBackupFromCli,
},
},
}
}
func BackupEtcdHosts(
ctx context.Context,
rkeConfig *v3.RancherKubernetesEngineConfig,
dockerDialerFactory hosts.DialerFactory,
configDir, backupName string) error {
log.Infof(ctx, "Starting Backup on etcd hosts")
kubeCluster, err := cluster.ParseCluster(ctx, rkeConfig, clusterFilePath, configDir, dockerDialerFactory, nil, nil)
if err != nil {
return err
}
if err := kubeCluster.TunnelHosts(ctx, false); err != nil {
return err
}
if err := kubeCluster.BackupEtcd(ctx, backupName); err != nil {
return err
}
log.Infof(ctx, "Finished backup on all etcd hosts")
return nil
}
func RestoreEtcdBackup(
ctx context.Context,
rkeConfig *v3.RancherKubernetesEngineConfig,
dockerDialerFactory hosts.DialerFactory,
configDir, backupName string) error {
log.Infof(ctx, "Starting restore on etcd hosts")
kubeCluster, err := cluster.ParseCluster(ctx, rkeConfig, clusterFilePath, configDir, dockerDialerFactory, nil, nil)
if err != nil {
return err
}
if err := kubeCluster.TunnelHosts(ctx, false); err != nil {
return err
}
if err := kubeCluster.RestoreEtcdBackup(ctx, backupName); err != nil {
return err
}
log.Infof(ctx, "Finished restoring on all etcd hosts")
return nil
}
func BackupEtcdHostsFromCli(ctx *cli.Context) error {
clusterFile, filePath, err := resolveClusterFile(ctx)
if err != nil {
return fmt.Errorf("Failed to resolve cluster file: %v", err)
}
clusterFilePath = filePath
rkeConfig, err := cluster.ParseConfig(clusterFile)
if err != nil {
return fmt.Errorf("Failed to parse cluster file: %v", err)
}
rkeConfig, err = setOptionsFromCLI(ctx, rkeConfig)
if err != nil {
return err
}
return BackupEtcdHosts(context.Background(), rkeConfig, nil, "", ctx.String("name"))
}
func RestoreEtcdBackupFromCli(ctx *cli.Context) error {
clusterFile, filePath, err := resolveClusterFile(ctx)
if err != nil {
return fmt.Errorf("Failed to resolve cluster file: %v", err)
}
clusterFilePath = filePath
rkeConfig, err := cluster.ParseConfig(clusterFile)
if err != nil {
return fmt.Errorf("Failed to parse cluster file: %v", err)
}
rkeConfig, err = setOptionsFromCLI(ctx, rkeConfig)
if err != nil {
return err
}
return RestoreEtcdBackup(context.Background(), rkeConfig, nil, "", ctx.String("name"))
}

View File

@ -266,23 +266,27 @@ func StopRenameContainer(ctx context.Context, dClient *client.Client, hostname s
if err := StopContainer(ctx, dClient, hostname, oldContainerName); err != nil {
return err
}
if err := WaitForContainer(ctx, dClient, hostname, oldContainerName); err != nil {
if _, err := WaitForContainer(ctx, dClient, hostname, oldContainerName); err != nil {
return nil
}
return RenameContainer(ctx, dClient, hostname, oldContainerName, newContainerName)
}
func WaitForContainer(ctx context.Context, dClient *client.Client, hostname string, containerName string) error {
func WaitForContainer(ctx context.Context, dClient *client.Client, hostname string, containerName string) (int64, error) {
// We capture the status exit code of the container
statusCh, errCh := dClient.ContainerWait(ctx, containerName, container.WaitConditionNotRunning)
select {
case err := <-errCh:
if err != nil {
return fmt.Errorf("Error waiting for container [%s] on host [%s]: %v", containerName, hostname, err)
// if error is present return 1 exit code
return 1, fmt.Errorf("Error waiting for container [%s] on host [%s]: %v", containerName, hostname, err)
}
case <-statusCh:
case status := <-statusCh:
// return the status exit code of the container
return status.StatusCode, nil
}
return nil
return 0, nil
}
func IsContainerUpgradable(ctx context.Context, dClient *client.Client, imageCfg *container.Config, containerName string, hostname string, plane string) (bool, error) {

View File

@ -4,6 +4,7 @@ import (
"context"
"fmt"
"path"
"path/filepath"
"github.com/docker/docker/api/types"
"github.com/docker/docker/api/types/container"
@ -58,6 +59,7 @@ func (h *Host) CleanUpAll(ctx context.Context, cleanerImage string, prsMap map[s
path.Join(h.PrefixPath, ToCleanTempCertPath),
path.Join(h.PrefixPath, ToCleanCNILib),
}
if !externalEtcd {
toCleanPaths = append(toCleanPaths, path.Join(h.PrefixPath, ToCleanEtcdDir))
}
@ -116,7 +118,7 @@ func (h *Host) CleanUp(ctx context.Context, toCleanPaths []string, cleanerImage
return err
}
if err := docker.WaitForContainer(ctx, h.DClient, h.Address, CleanerContainerName); err != nil {
if _, err := docker.WaitForContainer(ctx, h.DClient, h.Address, CleanerContainerName); err != nil {
return err
}
@ -240,8 +242,12 @@ func buildCleanerConfig(host *Host, toCleanDirs []string, cleanerImage string) (
Cmd: cmd,
}
bindMounts := []string{}
bindMountsMap := make(map[string]string)
for _, vol := range toCleanDirs {
bindMounts = append(bindMounts, fmt.Sprintf("%s:%s:z", vol, vol))
bindMountsMap[filepath.Dir(vol)] = vol
}
for dir := range bindMountsMap {
bindMounts = append(bindMounts, fmt.Sprintf("%s:%s:z", dir, dir))
}
hostCfg := &container.HostConfig{
Binds: bindMounts,

View File

@ -34,6 +34,7 @@ func mainErr() error {
cmd.RemoveCommand(),
cmd.VersionCommand(),
cmd.ConfigCommand(),
cmd.EtcdCommand(),
}
app.Flags = []cli.Flag{
cli.BoolFlag{

View File

@ -134,7 +134,7 @@ func FetchCertificatesFromHost(ctx context.Context, extraHosts []*hosts.Host, ho
for certName, config := range crtList {
certificate := CertificatePKI{}
crt, err := fetchFileFromHost(ctx, GetCertTempPath(certName), image, host, prsMap)
crt, err := FetchFileFromHost(ctx, GetCertTempPath(certName), image, host, prsMap)
// I will only exit with an error if it's not a not-found-error and this is not an etcd certificate
if err != nil && !strings.HasPrefix(certName, "kube-etcd") {
if strings.Contains(err.Error(), "no such file or directory") ||
@ -149,10 +149,10 @@ func FetchCertificatesFromHost(ctx context.Context, extraHosts []*hosts.Host, ho
tmpCerts[certName] = CertificatePKI{}
continue
}
key, err := fetchFileFromHost(ctx, GetKeyTempPath(certName), image, host, prsMap)
key, err := FetchFileFromHost(ctx, GetKeyTempPath(certName), image, host, prsMap)
if config {
config, err := fetchFileFromHost(ctx, GetConfigTempPath(certName), image, host, prsMap)
config, err := FetchFileFromHost(ctx, GetConfigTempPath(certName), image, host, prsMap)
if err != nil {
return nil, err
}
@ -179,7 +179,7 @@ func FetchCertificatesFromHost(ctx context.Context, extraHosts []*hosts.Host, ho
}
func fetchFileFromHost(ctx context.Context, filePath, image string, host *hosts.Host, prsMap map[string]v3.PrivateRegistry) (string, error) {
func FetchFileFromHost(ctx context.Context, filePath, image string, host *hosts.Host, prsMap map[string]v3.PrivateRegistry) (string, error) {
imageCfg := &container.Config{
Image: image,

View File

@ -2,21 +2,42 @@ package services
import (
"fmt"
"path"
"path/filepath"
"strings"
"time"
"context"
etcdclient "github.com/coreos/etcd/client"
"github.com/docker/docker/api/types/container"
"github.com/pkg/errors"
"github.com/rancher/rke/docker"
"github.com/rancher/rke/hosts"
"github.com/rancher/rke/log"
"github.com/rancher/rke/pki"
"github.com/rancher/types/apis/management.cattle.io/v3"
"github.com/sirupsen/logrus"
)
func RunEtcdPlane(ctx context.Context, etcdHosts []*hosts.Host, etcdNodePlanMap map[string]v3.RKEConfigNodePlan, localConnDialerFactory hosts.DialerFactory, prsMap map[string]v3.PrivateRegistry, updateWorkersOnly bool, alpineImage string) error {
type EtcdBackup struct {
// Enable or disable backup creation
Backup bool
// Creation period of the etcd backups
Creation string
// Retention period of the etcd backups
Retention string
}
func RunEtcdPlane(
ctx context.Context,
etcdHosts []*hosts.Host,
etcdNodePlanMap map[string]v3.RKEConfigNodePlan,
localConnDialerFactory hosts.DialerFactory,
prsMap map[string]v3.PrivateRegistry,
updateWorkersOnly bool,
alpineImage string,
etcdBackup EtcdBackup) error {
log.Infof(ctx, "[%s] Building up etcd plane..", ETCDRole)
for _, host := range etcdHosts {
if updateWorkersOnly {
@ -27,6 +48,11 @@ func RunEtcdPlane(ctx context.Context, etcdHosts []*hosts.Host, etcdNodePlanMap
if err := docker.DoRunContainer(ctx, host.DClient, imageCfg, hostCfg, EtcdContainerName, host.Address, ETCDRole, prsMap); err != nil {
return err
}
if etcdBackup.Backup {
if err := RunEtcdBackup(ctx, host, prsMap, alpineImage, etcdBackup.Creation, etcdBackup.Retention, EtcdBackupContainerName, false); err != nil {
return err
}
}
if err := createLogLink(ctx, host, EtcdContainerName, ETCDRole, alpineImage, prsMap); err != nil {
return err
}
@ -186,3 +212,84 @@ func IsEtcdMember(ctx context.Context, etcdHost *hosts.Host, etcdHosts []*hosts.
}
return false, nil
}
func RunEtcdBackup(ctx context.Context, etcdHost *hosts.Host, prsMap map[string]v3.PrivateRegistry, etcdBackupImage string, creation, retention, name string, once bool) error {
log.Infof(ctx, "[etcd] Starting backup on host [%s]", etcdHost.Address)
imageCfg := &container.Config{
Cmd: []string{
"/opt/rke/rke-etcd-backup",
"rolling-backup",
"--cacert", pki.GetCertPath(pki.CACertName),
"--cert", pki.GetCertPath(pki.KubeNodeCertName),
"--key", pki.GetKeyPath(pki.KubeNodeCertName),
"--name", name,
},
Image: etcdBackupImage,
}
if once {
imageCfg.Cmd = append(imageCfg.Cmd, "--once")
}
if !once {
imageCfg.Cmd = append(imageCfg.Cmd, "--retention="+retention)
imageCfg.Cmd = append(imageCfg.Cmd, "--creation="+creation)
}
hostCfg := &container.HostConfig{
Binds: []string{
"/opt/rke/etcdbackup:/backup",
fmt.Sprintf("%s:/etc/kubernetes:z", path.Join(etcdHost.PrefixPath, "/etc/kubernetes"))},
NetworkMode: container.NetworkMode("host"),
}
if once {
if err := docker.DoRunContainer(ctx, etcdHost.DClient, imageCfg, hostCfg, EtcdBackupOnceContainerName, etcdHost.Address, ETCDRole, prsMap); err != nil {
return err
}
status, err := docker.WaitForContainer(ctx, etcdHost.DClient, etcdHost.Address, EtcdBackupOnceContainerName)
if status != 0 || err != nil {
return fmt.Errorf("Failed to take etcd backup exit code [%s]: %v", status, err)
}
return docker.RemoveContainer(ctx, etcdHost.DClient, etcdHost.Address, EtcdBackupOnceContainerName)
}
return docker.DoRunContainer(ctx, etcdHost.DClient, imageCfg, hostCfg, EtcdBackupContainerName, etcdHost.Address, ETCDRole, prsMap)
}
func RestoreEtcdBackup(ctx context.Context, etcdHost *hosts.Host, prsMap map[string]v3.PrivateRegistry, etcdRestoreImage, backupName, initCluster string) error {
log.Infof(ctx, "[etcd] Restoring [%s] snapshot on etcd host [%s]", backupName, etcdHost.Address)
nodeName := pki.GetEtcdCrtName(etcdHost.InternalAddress)
backupPath := filepath.Join("/backup/", backupName)
imageCfg := &container.Config{
Cmd: []string{
"/usr/local/bin/etcdctl",
"--cacert", pki.GetCertPath(pki.CACertName),
"--cert", pki.GetCertPath(nodeName),
"--key", pki.GetKeyPath(nodeName),
"snapshot", "restore", backupPath,
"--data-dir=/var/lib/rancher/etcd",
"--name=etcd-" + etcdHost.HostnameOverride,
"--initial-cluster=" + initCluster,
"--initial-cluster-token=etcd-cluster-1",
"--initial-advertise-peer-urls=https://" + etcdHost.InternalAddress + ":2380",
},
Env: []string{"ETCDCTL_API=3"},
Image: etcdRestoreImage,
}
hostCfg := &container.HostConfig{
Binds: []string{
"/opt/rke/etcdbackup:/backup:z",
fmt.Sprintf("%s:/var/lib/rancher/:z", path.Join(etcdHost.PrefixPath, "/var/lib/")),
fmt.Sprintf("%s:/etc/kubernetes:z", path.Join(etcdHost.PrefixPath, "/etc/kubernetes"))},
NetworkMode: container.NetworkMode("host"),
}
if err := docker.DoRunContainer(ctx, etcdHost.DClient, imageCfg, hostCfg, EtcdRestoreContainerName, etcdHost.Address, ETCDRole, prsMap); err != nil {
return err
}
status, err := docker.WaitForContainer(ctx, etcdHost.DClient, etcdHost.Address, EtcdRestoreContainerName)
if err != nil {
return err
}
if status != 0 {
return fmt.Errorf("Failed to run etcd restore container, exit status is: %d", status)
}
return docker.RemoveContainer(ctx, etcdHost.DClient, etcdHost.Address, EtcdRestoreContainerName)
}

View File

@ -27,6 +27,9 @@ const (
KubeControllerContainerName = "kube-controller-manager"
SchedulerContainerName = "kube-scheduler"
EtcdContainerName = "etcd"
EtcdBackupContainerName = "etcd-backup"
EtcdBackupOnceContainerName = "etcd-backup-once"
EtcdRestoreContainerName = "etcd-restore"
NginxProxyContainerName = "nginx-proxy"
SidekickContainerName = "service-sidekick"
LogLinkContainerName = "rke-log-linker"