1
0
mirror of https://github.com/rancher/rke.git synced 2025-07-10 13:54:04 +00:00

Merge pull request #548 from galal-hussein/etcd_backup

etcd backup/restore
This commit is contained in:
Alena Prokharchyk 2018-05-09 10:48:34 -07:00 committed by GitHub
commit 47ddb6ee41
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
18 changed files with 535 additions and 27 deletions

117
README.md
View File

@ -363,6 +363,7 @@ nodes:
```
## Deploying Rancher 2.0 using rke
Using RKE's pluggable user addons, it's possible to deploy Rancher 2.0 server in HA with a single command.
Depending how you want to manage your ssl certificates, there are 2 deployment options:
@ -416,6 +417,122 @@ kubectl -n cattle-system scale deployment cattle --replicas=3
# chown <user> /var/run/docker.sock
```
## Etcd Backup and Restoration
You can configure a Rancher Kubernetes Engine (RKE) cluster to automatically create backups of etcd. In a disaster scenario, you can restore these backups, which are stored on other cluster nodes.
### Etcd Regular Backup
To schedule a recurring automatic etcd backup, enable the `etcd-backup` service. `etcd-backup` runs in a service container alongside the `etcd` container. `etcd-backup` automatically creates backups and stores them to its local disk.
To enable `etcd-backup` in RKE CLI, configure the following three variables:
```
services:
etcd:
backup: true
creation: 5m0s
retention: 24h
```
- `backup`: Enables/disables etcd backups in the RKE cluster.
Default value: `false`.
- `creation`: Time period in which `etcd-backup` creates and stores local backups.
Default value: `5m0s`
- `retention`: Time period before before an etcd backup expires. Expired backups are purged.
Default value: `24h`
After RKE runs, view the `etcd-backup` logs to confirm backups are being created automatically:
```
# docker logs etcd-backup
time="2018-05-04T18:39:16Z" level=info msg="Initializing Rolling Backups" creation=1m0s retention=24h0m0s
time="2018-05-04T18:40:16Z" level=info msg="Created backup" name="2018-05-04T18:40:16Z_etcd" runtime=108.332814ms
time="2018-05-04T18:41:16Z" level=info msg="Created backup" name="2018-05-04T18:41:16Z_etcd" runtime=92.880112ms
time="2018-05-04T18:42:16Z" level=info msg="Created backup" name="2018-05-04T18:42:16Z_etcd" runtime=83.67642ms
time="2018-05-04T18:43:16Z" level=info msg="Created backup" name="2018-05-04T18:43:16Z_etcd" runtime=86.298499ms
```
Backups are saved to the following directory: `/opt/rke/etcdbackup/`. Backups are created on each node that runs etcd.
### Etcd onetime Snapshots
RKE also added two commands that for etcd backup management:
```
./rke etcd backup [NAME]
```
and
```
./rke etcd restore [NAME]
```
The backup command saves a snapshot of etcd in `/opt/rke/etcdbackup`. This command also creates a container for the backup. When the backup completes, the container is removed.
```
# ./rke etcd backup --name snapshot
INFO[0000] Starting Backup on etcd hosts
INFO[0000] [dialer] Setup tunnel for host [x.x.x.x]
INFO[0002] [dialer] Setup tunnel for host [y.y.y.y]
INFO[0004] [dialer] Setup tunnel for host [z.z.z.z]
INFO[0006] [etcd] Starting backup on host [x.x.x.x]
INFO[0007] [etcd] Successfully started [etcd-backup-once] container on host [x.x.x.x]
INFO[0007] [etcd] Starting backup on host [y.y.y.y]
INFO[0009] [etcd] Successfully started [etcd-backup-once] container on host [y.y.y.y]
INFO[0010] [etcd] Starting backup on host [z.z.z.z]
INFO[0011] [etcd] Successfully started [etcd-backup-once] container on host [z.z.z.z]
INFO[0011] Finished backup on all etcd hosts
```
### Etcd Disaster recovery
`etcd restore` is used for etcd Disaster recovery, it reverts to any snapshot stored in `/opt/rke/etcdbackup` that you explicitly define. When you run `etcd restire`, RKE removes the old etcd container if it still exists. To restore operations, RKE creates a new etcd cluster using the snapshot you choose.
>**Warning:** Restoring an etcd backup deletes your current etcd cluster and replaces it with a new one. Before you run the `etcd restore` command, backup any important data in your current cluster.
```
./rke etcd restore --name snapshot --config test-aws.yml
INFO[0000] Starting restore on etcd hosts
INFO[0000] [dialer] Setup tunnel for host [x.x.x.x]
INFO[0002] [dialer] Setup tunnel for host [y.y.y.y]
INFO[0005] [dialer] Setup tunnel for host [z.z.z.z]
INFO[0007] [hosts] Cleaning up host [x.x.x.x]
INFO[0007] [hosts] Running cleaner container on host [x.x.x.x]
INFO[0008] [kube-cleaner] Successfully started [kube-cleaner] container on host [x.x.x.x]
INFO[0008] [hosts] Removing cleaner container on host [x.x.x.x]
INFO[0008] [hosts] Successfully cleaned up host [x.x.x.x]
INFO[0009] [hosts] Cleaning up host [y.y.y.y]
INFO[0009] [hosts] Running cleaner container on host [y.y.y.y]
INFO[0010] [kube-cleaner] Successfully started [kube-cleaner] container on host [y.y.y.y]
INFO[0010] [hosts] Removing cleaner container on host [y.y.y.y]
INFO[0010] [hosts] Successfully cleaned up host [y.y.y.y]
INFO[0011] [hosts] Cleaning up host [z.z.z.z]
INFO[0011] [hosts] Running cleaner container on host [z.z.z.z]
INFO[0012] [kube-cleaner] Successfully started [kube-cleaner] container on host [z.z.z.z]
INFO[0012] [hosts] Removing cleaner container on host [z.z.z.z]
INFO[0012] [hosts] Successfully cleaned up host [z.z.z.z]
INFO[0012] [etcd] Restoring [snapshot] snapshot on etcd host [x.x.x.x]
INFO[0013] [etcd] Successfully started [etcd-restore] container on host [x.x.x.x]
INFO[0014] [etcd] Restoring [snapshot] snapshot on etcd host [y.y.y.y]
INFO[0015] [etcd] Successfully started [etcd-restore] container on host [y.y.y.y]
INFO[0015] [etcd] Restoring [snapshot] snapshot on etcd host [z.z.z.z]
INFO[0016] [etcd] Successfully started [etcd-restore] container on host [z.z.z.z]
INFO[0017] [etcd] Building up etcd plane..
INFO[0018] [etcd] Successfully started [etcd] container on host [x.x.x.x]
INFO[0020] [etcd] Successfully started [rke-log-linker] container on host [x.x.x.x]
INFO[0021] [remove/rke-log-linker] Successfully removed container on host [x.x.x.x]
INFO[0022] [etcd] Successfully started [etcd] container on host [y.y.y.y]
INFO[0023] [etcd] Successfully started [rke-log-linker] container on host [y.y.y.y]
INFO[0025] [remove/rke-log-linker] Successfully removed container on host [y.y.y.y]
INFO[0025] [etcd] Successfully started [etcd] container on host [z.z.z.z]
INFO[0027] [etcd] Successfully started [rke-log-linker] container on host [z.z.z.z]
INFO[0027] [remove/rke-log-linker] Successfully removed container on host [z.z.z.z]
INFO[0027] [etcd] Successfully started etcd plane..
INFO[0027] Finished restoring on all etcd hosts
```
## License
Copyright (c) 2018 [Rancher Labs, Inc.](http://rancher.com)

View File

@ -228,3 +228,21 @@ func fetchBackupCertificates(ctx context.Context, backupHosts []*hosts.Host, kub
// reporting the last error only.
return nil, err
}
func fetchCertificatesFromEtcd(ctx context.Context, kubeCluster *Cluster) ([]byte, []byte, error) {
// Get kubernetes certificates from the etcd hosts
certificates := map[string]pki.CertificatePKI{}
var err error
for _, host := range kubeCluster.EtcdHosts {
certificates, err = pki.FetchCertificatesFromHost(ctx, kubeCluster.EtcdHosts, host, kubeCluster.SystemImages.Alpine, kubeCluster.LocalKubeConfigPath, kubeCluster.PrivateRegistriesMap)
if certificates != nil {
break
}
}
if err != nil || certificates == nil {
return nil, nil, fmt.Errorf("Failed to fetch certificates from etcd hosts: %v", err)
}
clientCert := cert.EncodeCertPEM(certificates[pki.KubeNodeCertName].Certificate)
clientkey := cert.EncodePrivateKeyPEM(certificates[pki.KubeNodeCertName].Key)
return clientCert, clientkey, nil
}

View File

@ -77,7 +77,12 @@ func (c *Cluster) DeployControlPlane(ctx context.Context) error {
if len(c.Services.Etcd.ExternalURLs) > 0 {
log.Infof(ctx, "[etcd] External etcd connection string has been specified, skipping etcd plane")
} else {
if err := services.RunEtcdPlane(ctx, c.EtcdHosts, etcdNodePlanMap, c.LocalConnDialerFactory, c.PrivateRegistriesMap, c.UpdateWorkersOnly, c.SystemImages.Alpine); err != nil {
etcdBackup := services.EtcdBackup{
Backup: c.Services.Etcd.Backup,
Creation: c.Services.Etcd.Creation,
Retention: c.Services.Etcd.Retention,
}
if err := services.RunEtcdPlane(ctx, c.EtcdHosts, etcdNodePlanMap, c.LocalConnDialerFactory, c.PrivateRegistriesMap, c.UpdateWorkersOnly, c.SystemImages.Alpine, etcdBackup); err != nil {
return fmt.Errorf("[etcd] Failed to bring up Etcd Plane: %v", err)
}
}

View File

@ -27,7 +27,9 @@ const (
DefaultNetworkPlugin = "canal"
DefaultNetworkCloudProvider = "none"
DefaultIngressController = "nginx"
DefaultIngressController = "nginx"
DefaultEtcdBackupCreationPeriod = "5m0s"
DefaultEtcdBackupRetentionPeriod = "24h"
)
func setDefaultIfEmptyMapValue(configMap map[string]string, key string, value string) {
@ -105,6 +107,8 @@ func (c *Cluster) setClusterServicesDefaults() {
&c.Services.Kubelet.Image: c.SystemImages.Kubernetes,
&c.Services.Kubeproxy.Image: c.SystemImages.Kubernetes,
&c.Services.Etcd.Image: c.SystemImages.Etcd,
&c.Services.Etcd.Creation: DefaultEtcdBackupCreationPeriod,
&c.Services.Etcd.Retention: DefaultEtcdBackupRetentionPeriod,
}
for k, v := range serviceConfigDefaultsMap {
setDefaultIfEmpty(k, v)

63
cluster/etcd.go Normal file
View File

@ -0,0 +1,63 @@
package cluster
import (
"context"
"fmt"
"path"
"github.com/rancher/rke/docker"
"github.com/rancher/rke/hosts"
"github.com/rancher/rke/services"
"github.com/rancher/types/apis/management.cattle.io/v3"
)
func (c *Cluster) BackupEtcd(ctx context.Context, backupName string) error {
for _, host := range c.EtcdHosts {
if err := services.RunEtcdBackup(ctx, host, c.PrivateRegistriesMap, c.SystemImages.Alpine, c.Services.Etcd.Creation, c.Services.Etcd.Retention, backupName, true); err != nil {
return err
}
}
return nil
}
func (c *Cluster) RestoreEtcdBackup(ctx context.Context, backupPath string) error {
// Stopping all etcd containers
for _, host := range c.EtcdHosts {
if err := tearDownOldEtcd(ctx, host, c.SystemImages.Alpine, c.PrivateRegistriesMap); err != nil {
return err
}
}
// Start restore process on all etcd hosts
initCluster := services.GetEtcdInitialCluster(c.EtcdHosts)
for _, host := range c.EtcdHosts {
if err := services.RestoreEtcdBackup(ctx, host, c.PrivateRegistriesMap, c.SystemImages.Etcd, backupPath, initCluster); err != nil {
return fmt.Errorf("[etcd] Failed to restore etcd backup: %v", err)
}
}
// Deploy Etcd Plane
etcdNodePlanMap := make(map[string]v3.RKEConfigNodePlan)
// Build etcd node plan map
for _, etcdHost := range c.EtcdHosts {
etcdNodePlanMap[etcdHost.Address] = BuildRKEConfigNodePlan(ctx, c, etcdHost, etcdHost.DockerInfo)
}
etcdBackup := services.EtcdBackup{
Backup: c.Services.Etcd.Backup,
Creation: c.Services.Etcd.Creation,
Retention: c.Services.Etcd.Retention,
}
if err := services.RunEtcdPlane(ctx, c.EtcdHosts, etcdNodePlanMap, c.LocalConnDialerFactory, c.PrivateRegistriesMap, c.UpdateWorkersOnly, c.SystemImages.Alpine, etcdBackup); err != nil {
return fmt.Errorf("[etcd] Failed to bring up Etcd Plane: %v", err)
}
return nil
}
func tearDownOldEtcd(ctx context.Context, host *hosts.Host, cleanupImage string, prsMap map[string]v3.PrivateRegistry) error {
if err := docker.DoRemoveContainer(ctx, host.DClient, services.EtcdContainerName, host.Address); err != nil {
return fmt.Errorf("[etcd] Failed to stop old etcd containers: %v", err)
}
// cleanup etcd data directory
toCleanPaths := []string{
path.Join(host.PrefixPath, hosts.ToCleanEtcdDir),
}
return host.CleanUp(ctx, toCleanPaths, cleanupImage, prsMap)
}

View File

@ -593,7 +593,7 @@ func (c *Cluster) BuildEtcdProcess(host *hosts.Host, etcdHosts []*hosts.Host, pr
}
Binds := []string{
fmt.Sprintf("%s:/var/lib/rancher/etcd:z", path.Join(prefixPath, "/var/lib/etcd")),
fmt.Sprintf("%s:/var/lib/rancher/:z", path.Join(prefixPath, "/var/lib/")),
fmt.Sprintf("%s:/etc/kubernetes:z", path.Join(prefixPath, "/etc/kubernetes")),
}

136
cmd/etcd.go Normal file
View File

@ -0,0 +1,136 @@
package cmd
import (
"context"
"fmt"
"github.com/rancher/rke/cluster"
"github.com/rancher/rke/hosts"
"github.com/rancher/rke/log"
"github.com/rancher/rke/pki"
"github.com/rancher/types/apis/management.cattle.io/v3"
"github.com/urfave/cli"
)
func EtcdCommand() cli.Command {
backupRestoreFlags := []cli.Flag{
cli.StringFlag{
Name: "name",
Usage: "Specify Backup name",
},
cli.StringFlag{
Name: "config",
Usage: "Specify an alternate cluster YAML file",
Value: pki.ClusterConfig,
EnvVar: "RKE_CONFIG",
},
}
backupRestoreFlags = append(backupRestoreFlags, sshCliOptions...)
return cli.Command{
Name: "etcd",
Usage: "etcd backup/restore operations in k8s cluster",
Subcommands: []cli.Command{
{
Name: "backup",
Usage: "Take backup on all etcd hosts",
Flags: backupRestoreFlags,
Action: BackupEtcdHostsFromCli,
},
{
Name: "restore",
Usage: "Restore existing backup",
Flags: backupRestoreFlags,
Action: RestoreEtcdBackupFromCli,
},
},
}
}
func BackupEtcdHosts(
ctx context.Context,
rkeConfig *v3.RancherKubernetesEngineConfig,
dockerDialerFactory hosts.DialerFactory,
configDir, backupName string) error {
log.Infof(ctx, "Starting Backup on etcd hosts")
kubeCluster, err := cluster.ParseCluster(ctx, rkeConfig, clusterFilePath, configDir, dockerDialerFactory, nil, nil)
if err != nil {
return err
}
if err := kubeCluster.TunnelHosts(ctx, false); err != nil {
return err
}
if err := kubeCluster.BackupEtcd(ctx, backupName); err != nil {
return err
}
log.Infof(ctx, "Finished backup on all etcd hosts")
return nil
}
func RestoreEtcdBackup(
ctx context.Context,
rkeConfig *v3.RancherKubernetesEngineConfig,
dockerDialerFactory hosts.DialerFactory,
configDir, backupName string) error {
log.Infof(ctx, "Starting restore on etcd hosts")
kubeCluster, err := cluster.ParseCluster(ctx, rkeConfig, clusterFilePath, configDir, dockerDialerFactory, nil, nil)
if err != nil {
return err
}
if err := kubeCluster.TunnelHosts(ctx, false); err != nil {
return err
}
if err := kubeCluster.RestoreEtcdBackup(ctx, backupName); err != nil {
return err
}
log.Infof(ctx, "Finished restoring on all etcd hosts")
return nil
}
func BackupEtcdHostsFromCli(ctx *cli.Context) error {
clusterFile, filePath, err := resolveClusterFile(ctx)
if err != nil {
return fmt.Errorf("Failed to resolve cluster file: %v", err)
}
clusterFilePath = filePath
rkeConfig, err := cluster.ParseConfig(clusterFile)
if err != nil {
return fmt.Errorf("Failed to parse cluster file: %v", err)
}
rkeConfig, err = setOptionsFromCLI(ctx, rkeConfig)
if err != nil {
return err
}
return BackupEtcdHosts(context.Background(), rkeConfig, nil, "", ctx.String("name"))
}
func RestoreEtcdBackupFromCli(ctx *cli.Context) error {
clusterFile, filePath, err := resolveClusterFile(ctx)
if err != nil {
return fmt.Errorf("Failed to resolve cluster file: %v", err)
}
clusterFilePath = filePath
rkeConfig, err := cluster.ParseConfig(clusterFile)
if err != nil {
return fmt.Errorf("Failed to parse cluster file: %v", err)
}
rkeConfig, err = setOptionsFromCLI(ctx, rkeConfig)
if err != nil {
return err
}
return RestoreEtcdBackup(context.Background(), rkeConfig, nil, "", ctx.String("name"))
}

View File

@ -266,23 +266,27 @@ func StopRenameContainer(ctx context.Context, dClient *client.Client, hostname s
if err := StopContainer(ctx, dClient, hostname, oldContainerName); err != nil {
return err
}
if err := WaitForContainer(ctx, dClient, hostname, oldContainerName); err != nil {
if _, err := WaitForContainer(ctx, dClient, hostname, oldContainerName); err != nil {
return nil
}
return RenameContainer(ctx, dClient, hostname, oldContainerName, newContainerName)
}
func WaitForContainer(ctx context.Context, dClient *client.Client, hostname string, containerName string) error {
func WaitForContainer(ctx context.Context, dClient *client.Client, hostname string, containerName string) (int64, error) {
// We capture the status exit code of the container
statusCh, errCh := dClient.ContainerWait(ctx, containerName, container.WaitConditionNotRunning)
select {
case err := <-errCh:
if err != nil {
return fmt.Errorf("Error waiting for container [%s] on host [%s]: %v", containerName, hostname, err)
// if error is present return 1 exit code
return 1, fmt.Errorf("Error waiting for container [%s] on host [%s]: %v", containerName, hostname, err)
}
case <-statusCh:
case status := <-statusCh:
// return the status exit code of the container
return status.StatusCode, nil
}
return nil
return 0, nil
}
func IsContainerUpgradable(ctx context.Context, dClient *client.Client, imageCfg *container.Config, containerName string, hostname string, plane string) (bool, error) {

View File

@ -4,6 +4,7 @@ import (
"context"
"fmt"
"path"
"path/filepath"
"github.com/docker/docker/api/types"
"github.com/docker/docker/api/types/container"
@ -58,6 +59,7 @@ func (h *Host) CleanUpAll(ctx context.Context, cleanerImage string, prsMap map[s
path.Join(h.PrefixPath, ToCleanTempCertPath),
path.Join(h.PrefixPath, ToCleanCNILib),
}
if !externalEtcd {
toCleanPaths = append(toCleanPaths, path.Join(h.PrefixPath, ToCleanEtcdDir))
}
@ -116,7 +118,7 @@ func (h *Host) CleanUp(ctx context.Context, toCleanPaths []string, cleanerImage
return err
}
if err := docker.WaitForContainer(ctx, h.DClient, h.Address, CleanerContainerName); err != nil {
if _, err := docker.WaitForContainer(ctx, h.DClient, h.Address, CleanerContainerName); err != nil {
return err
}
@ -240,8 +242,12 @@ func buildCleanerConfig(host *Host, toCleanDirs []string, cleanerImage string) (
Cmd: cmd,
}
bindMounts := []string{}
bindMountsMap := make(map[string]string)
for _, vol := range toCleanDirs {
bindMounts = append(bindMounts, fmt.Sprintf("%s:%s:z", vol, vol))
bindMountsMap[filepath.Dir(vol)] = vol
}
for dir := range bindMountsMap {
bindMounts = append(bindMounts, fmt.Sprintf("%s:%s:z", dir, dir))
}
hostCfg := &container.HostConfig{
Binds: bindMounts,

View File

@ -34,6 +34,7 @@ func mainErr() error {
cmd.RemoveCommand(),
cmd.VersionCommand(),
cmd.ConfigCommand(),
cmd.EtcdCommand(),
}
app.Flags = []cli.Flag{
cli.BoolFlag{

View File

@ -134,7 +134,7 @@ func FetchCertificatesFromHost(ctx context.Context, extraHosts []*hosts.Host, ho
for certName, config := range crtList {
certificate := CertificatePKI{}
crt, err := fetchFileFromHost(ctx, GetCertTempPath(certName), image, host, prsMap)
crt, err := FetchFileFromHost(ctx, GetCertTempPath(certName), image, host, prsMap)
// I will only exit with an error if it's not a not-found-error and this is not an etcd certificate
if err != nil && !strings.HasPrefix(certName, "kube-etcd") {
if strings.Contains(err.Error(), "no such file or directory") ||
@ -149,10 +149,10 @@ func FetchCertificatesFromHost(ctx context.Context, extraHosts []*hosts.Host, ho
tmpCerts[certName] = CertificatePKI{}
continue
}
key, err := fetchFileFromHost(ctx, GetKeyTempPath(certName), image, host, prsMap)
key, err := FetchFileFromHost(ctx, GetKeyTempPath(certName), image, host, prsMap)
if config {
config, err := fetchFileFromHost(ctx, GetConfigTempPath(certName), image, host, prsMap)
config, err := FetchFileFromHost(ctx, GetConfigTempPath(certName), image, host, prsMap)
if err != nil {
return nil, err
}
@ -179,7 +179,7 @@ func FetchCertificatesFromHost(ctx context.Context, extraHosts []*hosts.Host, ho
}
func fetchFileFromHost(ctx context.Context, filePath, image string, host *hosts.Host, prsMap map[string]v3.PrivateRegistry) (string, error) {
func FetchFileFromHost(ctx context.Context, filePath, image string, host *hosts.Host, prsMap map[string]v3.PrivateRegistry) (string, error) {
imageCfg := &container.Config{
Image: image,

View File

@ -2,21 +2,42 @@ package services
import (
"fmt"
"path"
"path/filepath"
"strings"
"time"
"context"
etcdclient "github.com/coreos/etcd/client"
"github.com/docker/docker/api/types/container"
"github.com/pkg/errors"
"github.com/rancher/rke/docker"
"github.com/rancher/rke/hosts"
"github.com/rancher/rke/log"
"github.com/rancher/rke/pki"
"github.com/rancher/types/apis/management.cattle.io/v3"
"github.com/sirupsen/logrus"
)
func RunEtcdPlane(ctx context.Context, etcdHosts []*hosts.Host, etcdNodePlanMap map[string]v3.RKEConfigNodePlan, localConnDialerFactory hosts.DialerFactory, prsMap map[string]v3.PrivateRegistry, updateWorkersOnly bool, alpineImage string) error {
type EtcdBackup struct {
// Enable or disable backup creation
Backup bool
// Creation period of the etcd backups
Creation string
// Retention period of the etcd backups
Retention string
}
func RunEtcdPlane(
ctx context.Context,
etcdHosts []*hosts.Host,
etcdNodePlanMap map[string]v3.RKEConfigNodePlan,
localConnDialerFactory hosts.DialerFactory,
prsMap map[string]v3.PrivateRegistry,
updateWorkersOnly bool,
alpineImage string,
etcdBackup EtcdBackup) error {
log.Infof(ctx, "[%s] Building up etcd plane..", ETCDRole)
for _, host := range etcdHosts {
if updateWorkersOnly {
@ -27,6 +48,11 @@ func RunEtcdPlane(ctx context.Context, etcdHosts []*hosts.Host, etcdNodePlanMap
if err := docker.DoRunContainer(ctx, host.DClient, imageCfg, hostCfg, EtcdContainerName, host.Address, ETCDRole, prsMap); err != nil {
return err
}
if etcdBackup.Backup {
if err := RunEtcdBackup(ctx, host, prsMap, alpineImage, etcdBackup.Creation, etcdBackup.Retention, EtcdBackupContainerName, false); err != nil {
return err
}
}
if err := createLogLink(ctx, host, EtcdContainerName, ETCDRole, alpineImage, prsMap); err != nil {
return err
}
@ -186,3 +212,84 @@ func IsEtcdMember(ctx context.Context, etcdHost *hosts.Host, etcdHosts []*hosts.
}
return false, nil
}
func RunEtcdBackup(ctx context.Context, etcdHost *hosts.Host, prsMap map[string]v3.PrivateRegistry, etcdBackupImage string, creation, retention, name string, once bool) error {
log.Infof(ctx, "[etcd] Starting backup on host [%s]", etcdHost.Address)
imageCfg := &container.Config{
Cmd: []string{
"/opt/rke/rke-etcd-backup",
"rolling-backup",
"--cacert", pki.GetCertPath(pki.CACertName),
"--cert", pki.GetCertPath(pki.KubeNodeCertName),
"--key", pki.GetKeyPath(pki.KubeNodeCertName),
"--name", name,
},
Image: etcdBackupImage,
}
if once {
imageCfg.Cmd = append(imageCfg.Cmd, "--once")
}
if !once {
imageCfg.Cmd = append(imageCfg.Cmd, "--retention="+retention)
imageCfg.Cmd = append(imageCfg.Cmd, "--creation="+creation)
}
hostCfg := &container.HostConfig{
Binds: []string{
"/opt/rke/etcdbackup:/backup",
fmt.Sprintf("%s:/etc/kubernetes:z", path.Join(etcdHost.PrefixPath, "/etc/kubernetes"))},
NetworkMode: container.NetworkMode("host"),
}
if once {
if err := docker.DoRunContainer(ctx, etcdHost.DClient, imageCfg, hostCfg, EtcdBackupOnceContainerName, etcdHost.Address, ETCDRole, prsMap); err != nil {
return err
}
status, err := docker.WaitForContainer(ctx, etcdHost.DClient, etcdHost.Address, EtcdBackupOnceContainerName)
if status != 0 || err != nil {
return fmt.Errorf("Failed to take etcd backup exit code [%s]: %v", status, err)
}
return docker.RemoveContainer(ctx, etcdHost.DClient, etcdHost.Address, EtcdBackupOnceContainerName)
}
return docker.DoRunContainer(ctx, etcdHost.DClient, imageCfg, hostCfg, EtcdBackupContainerName, etcdHost.Address, ETCDRole, prsMap)
}
func RestoreEtcdBackup(ctx context.Context, etcdHost *hosts.Host, prsMap map[string]v3.PrivateRegistry, etcdRestoreImage, backupName, initCluster string) error {
log.Infof(ctx, "[etcd] Restoring [%s] snapshot on etcd host [%s]", backupName, etcdHost.Address)
nodeName := pki.GetEtcdCrtName(etcdHost.InternalAddress)
backupPath := filepath.Join("/backup/", backupName)
imageCfg := &container.Config{
Cmd: []string{
"/usr/local/bin/etcdctl",
"--cacert", pki.GetCertPath(pki.CACertName),
"--cert", pki.GetCertPath(nodeName),
"--key", pki.GetKeyPath(nodeName),
"snapshot", "restore", backupPath,
"--data-dir=/var/lib/rancher/etcd",
"--name=etcd-" + etcdHost.HostnameOverride,
"--initial-cluster=" + initCluster,
"--initial-cluster-token=etcd-cluster-1",
"--initial-advertise-peer-urls=https://" + etcdHost.InternalAddress + ":2380",
},
Env: []string{"ETCDCTL_API=3"},
Image: etcdRestoreImage,
}
hostCfg := &container.HostConfig{
Binds: []string{
"/opt/rke/etcdbackup:/backup:z",
fmt.Sprintf("%s:/var/lib/rancher/:z", path.Join(etcdHost.PrefixPath, "/var/lib/")),
fmt.Sprintf("%s:/etc/kubernetes:z", path.Join(etcdHost.PrefixPath, "/etc/kubernetes"))},
NetworkMode: container.NetworkMode("host"),
}
if err := docker.DoRunContainer(ctx, etcdHost.DClient, imageCfg, hostCfg, EtcdRestoreContainerName, etcdHost.Address, ETCDRole, prsMap); err != nil {
return err
}
status, err := docker.WaitForContainer(ctx, etcdHost.DClient, etcdHost.Address, EtcdRestoreContainerName)
if err != nil {
return err
}
if status != 0 {
return fmt.Errorf("Failed to run etcd restore container, exit status is: %d", status)
}
return docker.RemoveContainer(ctx, etcdHost.DClient, etcdHost.Address, EtcdRestoreContainerName)
}

View File

@ -27,6 +27,9 @@ const (
KubeControllerContainerName = "kube-controller-manager"
SchedulerContainerName = "kube-scheduler"
EtcdContainerName = "etcd"
EtcdBackupContainerName = "etcd-backup"
EtcdBackupOnceContainerName = "etcd-backup-once"
EtcdRestoreContainerName = "etcd-restore"
NginxProxyContainerName = "nginx-proxy"
SidekickContainerName = "service-sidekick"
LogLinkContainerName = "rke-log-linker"

View File

@ -25,4 +25,4 @@ github.com/ugorji/go/codec ccfe18359b55b97855cee1d3f74e5efbda4869d
github.com/Microsoft/go-winio ab35fc04b6365e8fcb18e6e9e41ea4a02b10b175
github.com/rancher/norman ff60298f31f081b06d198815b4c178a578664f7d
github.com/rancher/types 6e722de69b9aac5396dc410497890c0fabe5f558
github.com/rancher/types a1ddf487a47ebd7b66039361f11ebfc93acfad69

View File

@ -130,17 +130,17 @@ type ActiveDirectoryConfig struct {
ServiceAccountPassword string `json:"serviceAccountPassword,omitempty" norman:"type=password,required"`
UserDisabledBitMask int64 `json:"userDisabledBitMask,omitempty" norman:"default=2"`
UserSearchBase string `json:"userSearchBase,omitempty" norman:"required"`
UserSearchAttribute string `json:"userSearchAttribute,omitempty" norman:"default=sAMAccountName"`
UserLoginAttribute string `json:"userLoginAttribute,omitempty" norman:"default=sAMAccountName"`
UserObjectClass string `json:"userObjectClass,omitempty" norman:"default=person"`
UserNameAttribute string `json:"userNameAttribute,omitempty" norman:"default=name"`
UserEnabledAttribute string `json:"userEnabledAttribute,omitempty" norman:"default=userAccountControl"`
UserSearchAttribute string `json:"userSearchAttribute,omitempty" norman:"default=sAMAccountName|sn|givenName,required"`
UserLoginAttribute string `json:"userLoginAttribute,omitempty" norman:"default=sAMAccountName,required"`
UserObjectClass string `json:"userObjectClass,omitempty" norman:"default=person,required"`
UserNameAttribute string `json:"userNameAttribute,omitempty" norman:"default=name,required"`
UserEnabledAttribute string `json:"userEnabledAttribute,omitempty" norman:"default=userAccountControl,required"`
GroupSearchBase string `json:"groupSearchBase,omitempty"`
GroupSearchAttribute string `json:"groupSearchAttribute,omitempty" norman:"default=sAMAccountName"`
GroupObjectClass string `json:"groupObjectClass,omitempty" norman:"default=group"`
GroupNameAttribute string `json:"groupNameAttribute,omitempty" norman:"default=name"`
GroupDNAttribute string `json:"groupDNAttribute,omitempty" norman:"default=distinguishedName"`
GroupMemberUserAttribute string `json:"groupMemberUserAttribute,omitempty" norman:"default=distinguishedName"`
GroupSearchAttribute string `json:"groupSearchAttribute,omitempty" norman:"default=sAMAccountName,required"`
GroupObjectClass string `json:"groupObjectClass,omitempty" norman:"default=group,required"`
GroupNameAttribute string `json:"groupNameAttribute,omitempty" norman:"default=name,required"`
GroupDNAttribute string `json:"groupDNAttribute,omitempty" norman:"default=distinguishedName,required"`
GroupMemberUserAttribute string `json:"groupMemberUserAttribute,omitempty" norman:"default=distinguishedName,required"`
GroupMemberMappingAttribute string `json:"groupMemberMappingAttribute,omitempty"`
ConnectionTimeout int64 `json:"connectionTimeout,omitempty" norman:"default=5000"`
}

View File

@ -8,7 +8,7 @@ const (
var (
m = image.Mirror
ToolsImage = m("rancher/rke-tools:v0.1.5")
ToolsImage = m("rancher/rke-tools:v0.1.6")
// K8sVersionToRKESystemImages - images map for 2.0
K8sVersionToRKESystemImages = map[string]RKESystemImages{

View File

@ -37,6 +37,23 @@ type RancherKubernetesEngineConfig struct {
PrefixPath string `yaml:"prefix_path" json:"prefixPath,omitempty"`
// Number of status check retries for addon deployment jobs
AddonJobRetries int `yaml:"addon_job_retries" json:"addonJobRetries,omitempty" norman:"default=5"`
// Bastion/Jump Host configuration
BastionHost BastionHost `yaml:"bastion_host" json:"bastionHost,omitempty"`
}
type BastionHost struct {
// Address of Bastion Host
Address string `yaml:"address" json:"address,omitempty"`
// SSH Port of Bastion Host
Port string `yaml:"port" json:"port,omitempty"`
// ssh User to Bastion Host
User string `yaml:"user" json:"user,omitempty"`
// SSH Agent Auth enable
SSHAgentAuth bool `yaml:"ssh_agent_auth,omitempty" json:"sshAgentAuth,omitempty"`
// SSH Private Key
SSHKey string `yaml:"ssh_key" json:"sshKey,omitempty"`
// SSH Private Key Path
SSHKeyPath string `yaml:"ssh_key_path" json:"sshKeyPath,omitempty"`
}
type PrivateRegistry struct {
@ -154,6 +171,12 @@ type ETCDService struct {
Key string `yaml:"key" json:"key,omitempty"`
// External etcd prefix
Path string `yaml:"path" json:"path,omitempty"`
// Etcd Backup Service
Backup bool `yaml:"backup" json:"backup,omitempty"`
// Etcd Backup Retention period
Retention string `yaml:"retention" json:"retention,omitempty"`
// Etcd Backup Creation period
Creation string `yaml:"creation" json:"creation,omitempty"`
}
type KubeAPIService struct {

View File

@ -88,6 +88,10 @@ func RegisterDeepCopies(scheme *runtime.Scheme) error {
in.(*BaseService).DeepCopyInto(out.(*BaseService))
return nil
}, InType: reflect.TypeOf(&BaseService{})},
conversion.GeneratedDeepCopyFunc{Fn: func(in interface{}, out interface{}, c *conversion.Cloner) error {
in.(*BastionHost).DeepCopyInto(out.(*BastionHost))
return nil
}, InType: reflect.TypeOf(&BastionHost{})},
conversion.GeneratedDeepCopyFunc{Fn: func(in interface{}, out interface{}, c *conversion.Cloner) error {
in.(*CalicoNetworkProvider).DeepCopyInto(out.(*CalicoNetworkProvider))
return nil
@ -1326,6 +1330,22 @@ func (in *BaseService) DeepCopy() *BaseService {
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *BastionHost) DeepCopyInto(out *BastionHost) {
*out = *in
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new BastionHost.
func (in *BastionHost) DeepCopy() *BastionHost {
if in == nil {
return nil
}
out := new(BastionHost)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *CalicoNetworkProvider) DeepCopyInto(out *CalicoNetworkProvider) {
*out = *in
@ -5918,6 +5938,7 @@ func (in *RancherKubernetesEngineConfig) DeepCopyInto(out *RancherKubernetesEngi
}
in.Ingress.DeepCopyInto(&out.Ingress)
in.CloudProvider.DeepCopyInto(&out.CloudProvider)
out.BastionHost = in.BastionHost
return
}