diff --git a/cluster/etcd.go b/cluster/etcd.go index 8a9aa782..41155f2f 100644 --- a/cluster/etcd.go +++ b/cluster/etcd.go @@ -28,6 +28,7 @@ func (c *Cluster) PrepareBackup(ctx context.Context, snapshotPath string) error // local backup case var backupServer *hosts.Host backupImage := c.getBackupImage() + var errors []error if !util.IsRancherBackupSupported(c.SystemImages.Alpine) { log.Warnf(ctx, "Auto local backup sync is not supported in `%s`. Using `%s` instead.", c.SystemImages.Alpine, backupImage) } @@ -41,11 +42,22 @@ func (c *Cluster) PrepareBackup(ctx context.Context, snapshotPath string) error if backupServer == nil { // start the download server, only one node should have it! if err := services.StartBackupServer(ctx, host, c.PrivateRegistriesMap, backupImage, snapshotPath); err != nil { log.Warnf(ctx, "failed to start backup server on host [%s]: %v", host.Address, err) + errors = append(errors, err) continue } backupServer = host + break } } + + if backupServer == nil { //failed to start the backupServer, I will cleanup and exit + for _, host := range c.EtcdHosts { + if err := docker.StartContainer(ctx, host.DClient, host.Address, services.EtcdContainerName); err != nil { + log.Warnf(ctx, "failed to start etcd container on host [%s]: %v", host.Address, err) + } + } + return fmt.Errorf("failed to start backup server on all etcd nodes: %v", errors) + } // start downloading the snapshot for _, host := range c.EtcdHosts { if backupServer != nil && host.Address == backupServer.Address { // we skip the backup server if it's there diff --git a/services/etcd.go b/services/etcd.go index 29135858..10d06f30 100644 --- a/services/etcd.go +++ b/services/etcd.go @@ -16,7 +16,7 @@ import ( "github.com/rancher/rke/log" "github.com/rancher/rke/pki" "github.com/rancher/rke/util" - "github.com/rancher/types/apis/management.cattle.io/v3" + v3 "github.com/rancher/types/apis/management.cattle.io/v3" "github.com/sirupsen/logrus" "golang.org/x/sync/errgroup" "k8s.io/client-go/util/cert" @@ -511,14 +511,33 @@ func StartBackupServer(ctx context.Context, etcdHost *hosts.Host, prsMap map[str }, Image: etcdSnapshotImage, } + hostCfg := &container.HostConfig{ Binds: []string{ fmt.Sprintf("%s:/backup", EtcdSnapshotPath), fmt.Sprintf("%s:/etc/kubernetes:z", path.Join(etcdHost.PrefixPath, "/etc/kubernetes"))}, NetworkMode: container.NetworkMode("host"), - RestartPolicy: container.RestartPolicy{Name: "on-failure"}, + RestartPolicy: container.RestartPolicy{Name: "no"}, } - return docker.DoRunContainer(ctx, etcdHost.DClient, imageCfg, hostCfg, EtcdServeBackupContainerName, etcdHost.Address, ETCDRole, prsMap) + if err := docker.DoRunContainer(ctx, etcdHost.DClient, imageCfg, hostCfg, EtcdServeBackupContainerName, etcdHost.Address, ETCDRole, prsMap); err != nil { + return err + } + container, err := docker.InspectContainer(ctx, etcdHost.DClient, etcdHost.Address, EtcdServeBackupContainerName) + if err != nil { + return err + } + if !container.State.Running { + containerLog, _, err := docker.GetContainerLogsStdoutStderr(ctx, etcdHost.DClient, EtcdServeBackupContainerName, "1", false) + if err != nil { + return err + } + if err := docker.RemoveContainer(ctx, etcdHost.DClient, etcdHost.Address, EtcdServeBackupContainerName); err != nil { + return err + } + // printing the restore container's logs + return fmt.Errorf("Failed to run backup server container, container logs: %s", containerLog) + } + return nil } func DownloadEtcdSnapshotFromBackupServer(ctx context.Context, etcdHost *hosts.Host, prsMap map[string]v3.PrivateRegistry, etcdSnapshotImage, name string, backupServer *hosts.Host) error { diff --git a/util/util.go b/util/util.go index 4bdb65bd..4005a8ce 100644 --- a/util/util.go +++ b/util/util.go @@ -7,13 +7,14 @@ import ( "strings" "github.com/coreos/go-semver/semver" - "github.com/rancher/types/apis/management.cattle.io/v3" + v3 "github.com/rancher/types/apis/management.cattle.io/v3" ) const ( WorkerThreads = 50 - - SupportedSyncToolsVersion = "0.1.22" + // this should be kept at the latest version of rke released with + // rancher 2.2.0. + SupportedSyncToolsVersion = "0.1.25" ) func StrToSemVer(version string) (*semver.Version, error) { @@ -115,6 +116,11 @@ func GetDefaultRKETools() string { return v3.AllK8sVersions[v3.DefaultK8s].Alpine } +// with rancher 2.2.0 and rke 0.2.0, etcdbackup was completely refactored +// and the interface for the rke-tools backup command changed significantly. +// This function is used to check the the release rke-tools version to choose +// between the new backup or the legacy backup code paths. +// The released version of rke-tools should be set in the const SupportedSyncToolsVersion func IsRancherBackupSupported(image string) bool { v := strings.Split(image, ":") last := v[len(v)-1]