1
0
mirror of https://github.com/rancher/rke.git synced 2025-09-09 02:51:15 +00:00

Added retry for snapshot and s3 upload, verify etcd running on host (#2952)

Added retry for snapshot and s3 upload, verify etcd running on host

Added option to quiet noisy container logs
This commit is contained in:
Jake Hyde
2022-06-23 18:30:58 -04:00
committed by GitHub
parent 51a850a1d0
commit 8aa6283dcd
8 changed files with 135 additions and 58 deletions

View File

@@ -557,7 +557,6 @@ func (c *Cluster) StoreAddonConfigMap(ctx context.Context, addonYaml string, add
timeout := make(chan bool, 1) timeout := make(chan bool, 1)
go func() { go func() {
for { for {
updated, err = k8s.UpdateConfigMap(kubeClient, []byte(addonYaml), addonName) updated, err = k8s.UpdateConfigMap(kubeClient, []byte(addonYaml), addonName)
if err != nil { if err != nil {
time.Sleep(time.Second * 5) time.Sleep(time.Second * 5)

View File

@@ -3,6 +3,7 @@ package cluster
import ( import (
"context" "context"
"fmt" "fmt"
"strings"
"github.com/sirupsen/logrus" "github.com/sirupsen/logrus"
@@ -17,16 +18,52 @@ import (
func (c *Cluster) SnapshotEtcd(ctx context.Context, snapshotName string) error { func (c *Cluster) SnapshotEtcd(ctx context.Context, snapshotName string) error {
backupImage := c.getBackupImage() backupImage := c.getBackupImage()
containerTimeout := DefaultEtcdBackupConfigTimeout
if c.Services.Etcd.BackupConfig != nil && c.Services.Etcd.BackupConfig.Timeout > 0 {
containerTimeout = c.Services.Etcd.BackupConfig.Timeout
}
// store first error message
var snapshotErr error
snapshotFailures := 0
s3UploadFailures := 0
for _, host := range c.EtcdHosts { for _, host := range c.EtcdHosts {
containerTimeout := DefaultEtcdBackupConfigTimeout
if c.Services.Etcd.BackupConfig != nil && c.Services.Etcd.BackupConfig.Timeout > 0 {
containerTimeout = c.Services.Etcd.BackupConfig.Timeout
}
newCtx := context.WithValue(ctx, docker.WaitTimeoutContextKey, containerTimeout) newCtx := context.WithValue(ctx, docker.WaitTimeoutContextKey, containerTimeout)
if err := services.RunEtcdSnapshotSave(newCtx, host, c.PrivateRegistriesMap, backupImage, snapshotName, true, c.Services.Etcd, c.Version); err != nil { if err := services.RunEtcdSnapshotSave(newCtx, host, c.PrivateRegistriesMap, backupImage, snapshotName, true, c.Services.Etcd, c.Version); err != nil {
return err if strings.Contains(err.Error(), "failed to upload etcd snapshot file to s3 on host") {
s3UploadFailures++
} else {
if snapshotErr == nil {
snapshotErr = err
}
snapshotFailures++
}
} }
} }
if snapshotFailures == len(c.EtcdHosts) {
log.Warnf(ctx, "[etcd] Failed to take snapshot on all etcd hosts: %s", snapshotErr)
return fmt.Errorf("[etcd] Failed to take snapshot on all etcd hosts: %s", snapshotErr)
} else if snapshotFailures > 0 {
log.Warnf(ctx, "[etcd] Failed to take snapshot on %s etcd hosts", snapshotFailures)
} else {
log.Infof(ctx, "[etcd] Finished saving snapshot [%s] on all etcd hosts", snapshotName)
}
if c.Services.Etcd.BackupConfig.S3BackupConfig == nil {
return nil
}
if s3UploadFailures >= len(c.EtcdHosts)-snapshotFailures {
log.Warnf(ctx, "[etcd] Failed to upload etcd snapshot file to s3 on all etcd hosts")
return fmt.Errorf("[etcd] Failed to upload etcd snapshot file to s3 on all etcd hosts")
} else if s3UploadFailures > 0 {
log.Warnf(ctx, "[etcd] Failed to upload etcd snapshot file to s3 on %s etcd hosts", s3UploadFailures)
} else {
log.Infof(ctx, "[etcd] Finished uploading etcd snapshot file to s3 on all etcd hosts")
}
return nil return nil
} }
@@ -112,13 +149,28 @@ func (c *Cluster) PrepareBackup(ctx context.Context, snapshotPath string) error
var backupServer *hosts.Host var backupServer *hosts.Host
backupImage := c.getBackupImage() backupImage := c.getBackupImage()
var errors []error var errors []error
if c.Services.Etcd.BackupConfig == nil || // legacy rke local backup // s3 backup case
(c.Services.Etcd.BackupConfig != nil && c.Services.Etcd.BackupConfig.S3BackupConfig == nil) { // rancher local backup if c.Services.Etcd.BackupConfig != nil &&
c.Services.Etcd.BackupConfig.S3BackupConfig != nil {
log.Infof(ctx, "[etcd] etcd s3 backup configuration found, will use s3 as source")
downloadFailed := false
for _, host := range c.EtcdHosts {
if err := services.DownloadEtcdSnapshotFromS3(ctx, host, c.PrivateRegistriesMap, backupImage, snapshotPath, c.Services.Etcd, c.Version); err != nil {
log.Warnf(ctx, "failed to download snapshot [%s] from s3 on host [%s]: %v", snapshotPath, host.Address, err)
downloadFailed = true
break
}
}
backupReady = !downloadFailed
}
// legacy rke local backup or rancher local backup
if !backupReady {
if c.Services.Etcd.BackupConfig == nil { if c.Services.Etcd.BackupConfig == nil {
log.Infof(ctx, "[etcd] No etcd snapshot configuration found, will use local as source") log.Infof(ctx, "[etcd] No etcd snapshot configuration found, will use local as source")
} } else if c.Services.Etcd.BackupConfig.S3BackupConfig == nil {
if c.Services.Etcd.BackupConfig != nil && c.Services.Etcd.BackupConfig.S3BackupConfig == nil {
log.Infof(ctx, "[etcd] etcd snapshot configuration found and no s3 backup configuration found, will use local as source") log.Infof(ctx, "[etcd] etcd snapshot configuration found and no s3 backup configuration found, will use local as source")
} else {
log.Warnf(ctx, "[etcd] etcd snapshot configuration found and s3 backup configuration failed, falling back to use local as source")
} }
// stop etcd on all etcd nodes, we need this because we start the backup server on the same port // stop etcd on all etcd nodes, we need this because we start the backup server on the same port
for _, host := range c.EtcdHosts { for _, host := range c.EtcdHosts {
@@ -159,17 +211,6 @@ func (c *Cluster) PrepareBackup(ctx context.Context, snapshotPath string) error
backupReady = true backupReady = true
} }
// s3 backup case
if c.Services.Etcd.BackupConfig != nil &&
c.Services.Etcd.BackupConfig.S3BackupConfig != nil {
log.Infof(ctx, "[etcd] etcd s3 backup configuration found, will use s3 as source")
for _, host := range c.EtcdHosts {
if err := services.DownloadEtcdSnapshotFromS3(ctx, host, c.PrivateRegistriesMap, backupImage, snapshotPath, c.Services.Etcd, c.Version); err != nil {
return err
}
}
backupReady = true
}
if !backupReady { if !backupReady {
return fmt.Errorf("failed to prepare backup for restore") return fmt.Errorf("failed to prepare backup for restore")
} }

View File

@@ -124,7 +124,7 @@ func DoRunOnetimeContainer(ctx context.Context, dClient *client.Client, imageCfg
} }
log.Infof(ctx, "Successfully started [%s] container on host [%s]", containerName, hostname) log.Infof(ctx, "Successfully started [%s] container on host [%s]", containerName, hostname)
log.Infof(ctx, "Waiting for [%s] container to exit on host [%s]", containerName, hostname) log.Infof(ctx, "Waiting for [%s] container to exit on host [%s]", containerName, hostname)
exitCode, err := WaitForContainer(ctx, dClient, hostname, containerName) exitCode, err := WaitForContainer(ctx, dClient, hostname, containerName, true)
if err != nil { if err != nil {
return fmt.Errorf("Container [%s] did not complete in time on host [%s]", containerName, hostname) return fmt.Errorf("Container [%s] did not complete in time on host [%s]", containerName, hostname)
} }
@@ -164,11 +164,11 @@ func DoRollingUpdateContainer(ctx context.Context, dClient *client.Client, image
return fmt.Errorf("[%s] Failed rolling update of container: docker client is nil for container [%s] on host [%s]", plane, containerName, hostname) return fmt.Errorf("[%s] Failed rolling update of container: docker client is nil for container [%s] on host [%s]", plane, containerName, hostname)
} }
logrus.Debugf("[%s] Checking for deployed [%s]", plane, containerName) logrus.Debugf("[%s] Checking for deployed [%s]", plane, containerName)
isRunning, err := IsContainerRunning(ctx, dClient, hostname, containerName, false) exists, err := DoesContainerExist(ctx, dClient, hostname, containerName, false)
if err != nil { if err != nil {
return err return err
} }
if !isRunning { if !exists {
logrus.Debugf("[%s] Container %s is not running on host [%s]", plane, containerName, hostname) logrus.Debugf("[%s] Container %s is not running on host [%s]", plane, containerName, hostname)
return nil return nil
} }
@@ -217,30 +217,52 @@ func DoRemoveContainer(ctx context.Context, dClient *client.Client, containerNam
return nil return nil
} }
func IsContainerRunning(ctx context.Context, dClient *client.Client, hostname string, containerName string, all bool) (bool, error) { func FindContainer(ctx context.Context, dClient *client.Client, hostname string, containerName string, all bool) (*types.Container, error) {
if dClient == nil { if dClient == nil {
return false, fmt.Errorf("Failed to check if container is running: docker client is nil for container [%s] on host [%s]", containerName, hostname) return nil, fmt.Errorf("Failed to find container: docker client is nil for container [%s] on host [%s]", containerName, hostname)
} }
var containers []types.Container var containers []types.Container
var err error var err error
for i := 1; i <= RetryCount; i++ { for i := 1; i <= RetryCount; i++ {
logrus.Infof("Checking if container [%s] is running on host [%s], try #%d", containerName, hostname, i) logrus.Infof("Finding container [%s] on host [%s], try #%d", containerName, hostname, i)
containers, err = dClient.ContainerList(ctx, types.ContainerListOptions{All: all}) containers, err = dClient.ContainerList(ctx, types.ContainerListOptions{All: all})
if err != nil { if err != nil {
logrus.Warnf("Error checking if container [%s] is running on host [%s]: %v", containerName, hostname, err) logrus.Warnf("Error finding container [%s] exists on host [%s]: %v", containerName, hostname, err)
continue continue
} }
break break
} }
if err != nil { if err != nil {
return false, fmt.Errorf("Error checking if container [%s] is running on host [%s]: %v", containerName, hostname, err) return nil, fmt.Errorf("Error checking if container [%s] exists on host [%s]: %v", containerName, hostname, err)
} }
for _, container := range containers { for _, container := range containers {
if len(container.Names) != 0 && container.Names[0] == "/"+containerName { if len(container.Names) != 0 && container.Names[0] == "/"+containerName {
return true, nil return &container, nil
} }
} }
return false, nil return nil, nil
}
func DoesContainerExist(ctx context.Context, dClient *client.Client, hostname string, containerName string, all bool) (bool, error) {
if dClient == nil {
return false, fmt.Errorf("Failed to check if container exists: docker client is nil for container [%s] on host [%s]", containerName, hostname)
}
container, err := FindContainer(ctx, dClient, hostname, containerName, all)
if err != nil {
return false, fmt.Errorf("Error checking if container [%s] is running on host [%s]: %v", containerName, hostname, err)
}
return container != nil, nil
}
func IsContainerRunning(ctx context.Context, dClient *client.Client, hostname string, containerName string, all bool) (bool, error) {
if dClient == nil {
return false, fmt.Errorf("Failed to check if container is running: docker client is nil for container [%s] on host [%s]", containerName, hostname)
}
container, err := FindContainer(ctx, dClient, hostname, containerName, all)
if err != nil {
return false, fmt.Errorf("Error checking if container [%s] is running on host [%s]: %v", containerName, hostname, err)
}
return container != nil && container.State == "running", nil
} }
func localImageExists(ctx context.Context, dClient *client.Client, hostname string, containerImage string) error { func localImageExists(ctx context.Context, dClient *client.Client, hostname string, containerImage string) error {
@@ -476,7 +498,7 @@ func StopRenameContainer(ctx context.Context, dClient *client.Client, hostname s
return fmt.Errorf("Failed to stop and rename container: docker client is nil for container [%s] on host [%s]", oldContainerName, hostname) return fmt.Errorf("Failed to stop and rename container: docker client is nil for container [%s] on host [%s]", oldContainerName, hostname)
} }
// make sure we don't have an old old-container from a previous broken update // make sure we don't have an old old-container from a previous broken update
exists, err := IsContainerRunning(ctx, dClient, hostname, newContainerName, true) exists, err := DoesContainerExist(ctx, dClient, hostname, newContainerName, true)
if err != nil { if err != nil {
return err return err
} }
@@ -488,14 +510,14 @@ func StopRenameContainer(ctx context.Context, dClient *client.Client, hostname s
if err := StopContainer(ctx, dClient, hostname, oldContainerName); err != nil { if err := StopContainer(ctx, dClient, hostname, oldContainerName); err != nil {
return err return err
} }
if _, err := WaitForContainer(ctx, dClient, hostname, oldContainerName); err != nil { if _, err := WaitForContainer(ctx, dClient, hostname, oldContainerName, true); err != nil {
return err return err
} }
return RenameContainer(ctx, dClient, hostname, oldContainerName, newContainerName) return RenameContainer(ctx, dClient, hostname, oldContainerName, newContainerName)
} }
func WaitForContainer(ctx context.Context, dClient *client.Client, hostname string, containerName string) (int64, error) { func WaitForContainer(ctx context.Context, dClient *client.Client, hostname string, containerName string, noisy bool) (int64, error) {
if dClient == nil { if dClient == nil {
return 1, fmt.Errorf("Failed waiting for container: docker client is nil for container [%s] on host [%s]", containerName, hostname) return 1, fmt.Errorf("Failed waiting for container: docker client is nil for container [%s] on host [%s]", containerName, hostname)
} }
@@ -504,8 +526,9 @@ func WaitForContainer(ctx context.Context, dClient *client.Client, hostname stri
if v, ok := ctx.Value(WaitTimeoutContextKey).(int); ok && v > 0 { if v, ok := ctx.Value(WaitTimeoutContextKey).(int); ok && v > 0 {
containerTimeout = v containerTimeout = v
} }
log.Infof(ctx, "Waiting for [%s] container to exit on host [%s]", containerName, hostname)
var lastStdout, lastStderr string
for retries := 0; retries < containerTimeout; retries++ { for retries := 0; retries < containerTimeout; retries++ {
log.Infof(ctx, "Waiting for [%s] container to exit on host [%s]", containerName, hostname)
container, err := InspectContainer(ctx, dClient, hostname, containerName) container, err := InspectContainer(ctx, dClient, hostname, containerName)
if err != nil { if err != nil {
return 1, fmt.Errorf("Could not inspect container [%s] on host [%s]: %s", containerName, hostname, err) return 1, fmt.Errorf("Could not inspect container [%s] on host [%s]: %s", containerName, hostname, err)
@@ -515,8 +538,12 @@ func WaitForContainer(ctx context.Context, dClient *client.Client, hostname stri
if err != nil { if err != nil {
logrus.Warnf("Failed to get container logs from container [%s] on host [%s]: %v", containerName, hostname, err) logrus.Warnf("Failed to get container logs from container [%s] on host [%s]: %v", containerName, hostname, err)
} }
if noisy || lastStdout != stdout || lastStderr != stderr {
log.Infof(ctx, "Container [%s] is still running on host [%s]: stderr: [%s], stdout: [%s]", containerName, hostname, stderr, stdout)
lastStdout = stdout
lastStderr = stderr
}
log.Infof(ctx, "Container [%s] is still running on host [%s]: stderr: [%s], stdout: [%s]", containerName, hostname, stderr, stdout)
time.Sleep(1 * time.Second) time.Sleep(1 * time.Second)
continue continue
} }
@@ -789,11 +816,11 @@ func DoRestartContainer(ctx context.Context, dClient *client.Client, containerNa
return nil return nil
} }
func GetContainerOutput(ctx context.Context, dClient *client.Client, containerName, hostname string) (int64, string, string, error) { func GetContainerOutput(ctx context.Context, dClient *client.Client, containerName, hostname string, noisy bool) (int64, string, string, error) {
if dClient == nil { if dClient == nil {
return 1, "", "", fmt.Errorf("Failed to get container output: docker client is nil for container [%s] on host [%s]", containerName, hostname) return 1, "", "", fmt.Errorf("Failed to get container output: docker client is nil for container [%s] on host [%s]", containerName, hostname)
} }
status, err := WaitForContainer(ctx, dClient, hostname, containerName) status, err := WaitForContainer(ctx, dClient, hostname, containerName, noisy)
if err != nil { if err != nil {
return 1, "", "", err return 1, "", "", err
} }

View File

@@ -135,7 +135,7 @@ func (h *Host) CleanUp(ctx context.Context, toCleanPaths []string, cleanerImage
return err return err
} }
if _, err := docker.WaitForContainer(ctx, h.DClient, h.Address, CleanerContainerName); err != nil { if _, err := docker.WaitForContainer(ctx, h.DClient, h.Address, CleanerContainerName, true); err != nil {
return err return err
} }

View File

@@ -116,7 +116,7 @@ func DeployStateOnPlaneHost(ctx context.Context, host *hosts.Host, stateDownload
logrus.Warnf("[state] Error during copying state file [%s] to node [%s]: %v", stateFilePath, host.Address, err) logrus.Warnf("[state] Error during copying state file [%s] to node [%s]: %v", stateFilePath, host.Address, err)
} }
if _, err := docker.WaitForContainer(ctx, host.DClient, host.Address, StateDeployerContainerName); err != nil { if _, err := docker.WaitForContainer(ctx, host.DClient, host.Address, StateDeployerContainerName, true); err != nil {
return err return err
} }
@@ -125,12 +125,11 @@ func DeployStateOnPlaneHost(ctx context.Context, host *hosts.Host, stateDownload
func doRunDeployer(ctx context.Context, host *hosts.Host, containerEnv []string, certDownloaderImage string, prsMap map[string]v3.PrivateRegistry, k8sVersion string) error { func doRunDeployer(ctx context.Context, host *hosts.Host, containerEnv []string, certDownloaderImage string, prsMap map[string]v3.PrivateRegistry, k8sVersion string) error {
// remove existing container. Only way it's still here is if previous deployment failed // remove existing container. Only way it's still here is if previous deployment failed
isRunning := false exists, err := docker.DoesContainerExist(ctx, host.DClient, host.Address, CrtDownloaderContainer, true)
isRunning, err := docker.IsContainerRunning(ctx, host.DClient, host.Address, CrtDownloaderContainer, true)
if err != nil { if err != nil {
return err return err
} }
if isRunning { if exists {
if err := docker.RemoveContainer(ctx, host.DClient, host.Address, CrtDownloaderContainer); err != nil { if err := docker.RemoveContainer(ctx, host.DClient, host.Address, CrtDownloaderContainer); err != nil {
return err return err
} }
@@ -188,7 +187,7 @@ func doRunDeployer(ctx context.Context, host *hosts.Host, containerEnv []string,
} }
logrus.Debugf("[certificates] Successfully started Certificate deployer container: %s", CrtDownloaderContainer) logrus.Debugf("[certificates] Successfully started Certificate deployer container: %s", CrtDownloaderContainer)
for { for {
isDeployerRunning, err := docker.IsContainerRunning(ctx, host.DClient, host.Address, CrtDownloaderContainer, false) isDeployerRunning, err := docker.DoesContainerExist(ctx, host.DClient, host.Address, CrtDownloaderContainer, false)
if err != nil { if err != nil {
return err return err
} }
@@ -331,11 +330,11 @@ func FetchFileFromHost(ctx context.Context, filePath, image string, host *hosts.
Binds: Binds, Binds: Binds,
Privileged: true, Privileged: true,
} }
isRunning, err := docker.IsContainerRunning(ctx, host.DClient, host.Address, containerName, true) exists, err := docker.DoesContainerExist(ctx, host.DClient, host.Address, containerName, true)
if err != nil { if err != nil {
return "", err return "", err
} }
if !isRunning { if !exists {
if err := docker.DoRunContainer(ctx, host.DClient, imageCfg, hostCfg, containerName, host.Address, state, prsMap); err != nil { if err := docker.DoRunContainer(ctx, host.DClient, imageCfg, hostCfg, containerName, host.Address, state, prsMap); err != nil {
return "", err return "", err
} }

View File

@@ -131,7 +131,7 @@ func SaveBackupBundleOnHost(ctx context.Context, host *hosts.Host, alpineSystemI
if err := docker.DoRunContainer(ctx, host.DClient, imageCfg, hostCfg, BundleCertContainer, host.Address, "certificates", prsMap); err != nil { if err := docker.DoRunContainer(ctx, host.DClient, imageCfg, hostCfg, BundleCertContainer, host.Address, "certificates", prsMap); err != nil {
return err return err
} }
status, err := docker.WaitForContainer(ctx, host.DClient, host.Address, BundleCertContainer) status, err := docker.WaitForContainer(ctx, host.DClient, host.Address, BundleCertContainer, true)
if err != nil { if err != nil {
return err return err
} }

View File

@@ -439,7 +439,6 @@ func RunEtcdSnapshotSave(ctx context.Context, etcdHost *hosts.Host, prsMap map[s
if hosts.IsDockerSELinuxEnabled(etcdHost) { if hosts.IsDockerSELinuxEnabled(etcdHost) {
hostCfg.SecurityOpt = append(hostCfg.SecurityOpt, SELinuxLabel) hostCfg.SecurityOpt = append(hostCfg.SecurityOpt, SELinuxLabel)
} }
} }
hostCfg.Binds = binds hostCfg.Binds = binds
@@ -449,18 +448,30 @@ func RunEtcdSnapshotSave(ctx context.Context, etcdHost *hosts.Host, prsMap map[s
if err := docker.DoRemoveContainer(ctx, etcdHost.DClient, EtcdSnapshotOnceContainerName, etcdHost.Address); err != nil { if err := docker.DoRemoveContainer(ctx, etcdHost.DClient, EtcdSnapshotOnceContainerName, etcdHost.Address); err != nil {
return err return err
} }
// If the etcd container is not running the snapshot will never succeed
log.Debugf(ctx, "[etcd] Checking if etcd is running on host [%s]", etcdHost.Address)
if running, err := docker.IsContainerRunning(ctx, etcdHost.DClient, etcdHost.Address, "etcd", true); err != nil {
return err
} else if !running {
return fmt.Errorf("etcd is not running on host [%s]", etcdHost.Address)
}
if err := docker.DoRunContainer(ctx, etcdHost.DClient, imageCfg, hostCfg, EtcdSnapshotOnceContainerName, etcdHost.Address, ETCDRole, prsMap); err != nil { if err := docker.DoRunContainer(ctx, etcdHost.DClient, imageCfg, hostCfg, EtcdSnapshotOnceContainerName, etcdHost.Address, ETCDRole, prsMap); err != nil {
return err return err
} }
status, _, stderr, err := docker.GetContainerOutput(ctx, etcdHost.DClient, EtcdSnapshotOnceContainerName, etcdHost.Address) status, _, stderr, err := docker.GetContainerOutput(ctx, etcdHost.DClient, EtcdSnapshotOnceContainerName, etcdHost.Address, false)
if status != 0 || err != nil { if status != 0 || err != nil {
if removeErr := docker.RemoveContainer(ctx, etcdHost.DClient, etcdHost.Address, EtcdSnapshotOnceContainerName); removeErr != nil { if removeErr := docker.RemoveContainer(ctx, etcdHost.DClient, etcdHost.Address, EtcdSnapshotOnceContainerName); removeErr != nil {
log.Warnf(ctx, "[etcd] Failed to remove container [%s] on host [%s]: %v", removeErr, etcdHost.Address) log.Warnf(ctx, "[etcd] Failed to remove container [%s] on host [%s]: %v", EtcdSnapshotOnceContainerName, removeErr, etcdHost.Address)
} }
if err != nil { if err != nil {
return err return err
} }
return fmt.Errorf("[etcd] Failed to take one-time snapshot on host [%s], exit code [%d]: %v", etcdHost.Address, status, stderr) if strings.Contains(stderr, "failed to upload etcd snapshot file") {
return fmt.Errorf("failed to upload etcd snapshot file to s3 on host [%s], exit code [%d]: %v", etcdHost.Address, status, stderr)
}
return fmt.Errorf("failed to take one-time snapshot on host [%s], exit code [%d]: %v", etcdHost.Address, status, stderr)
} }
return docker.RemoveContainer(ctx, etcdHost.DClient, etcdHost.Address, EtcdSnapshotOnceContainerName) return docker.RemoveContainer(ctx, etcdHost.DClient, etcdHost.Address, EtcdSnapshotOnceContainerName)
@@ -618,7 +629,7 @@ func DownloadEtcdSnapshotFromS3(ctx context.Context, etcdHost *hosts.Host, prsMa
return err return err
} }
status, _, stderr, err := docker.GetContainerOutput(ctx, etcdHost.DClient, EtcdDownloadBackupContainerName, etcdHost.Address) status, _, stderr, err := docker.GetContainerOutput(ctx, etcdHost.DClient, EtcdDownloadBackupContainerName, etcdHost.Address, true)
if status != 0 || err != nil { if status != 0 || err != nil {
if removeErr := docker.RemoveContainer(ctx, etcdHost.DClient, etcdHost.Address, EtcdDownloadBackupContainerName); removeErr != nil { if removeErr := docker.RemoveContainer(ctx, etcdHost.DClient, etcdHost.Address, EtcdDownloadBackupContainerName); removeErr != nil {
log.Warnf(ctx, "Failed to remove container [%s]: %v", removeErr) log.Warnf(ctx, "Failed to remove container [%s]: %v", removeErr)
@@ -689,7 +700,7 @@ func RestoreEtcdSnapshot(ctx context.Context, etcdHost *hosts.Host, prsMap map[s
if err := docker.DoRunContainer(ctx, etcdHost.DClient, imageCfg, hostCfg, EtcdRestoreContainerName, etcdHost.Address, ETCDRole, prsMap); err != nil { if err := docker.DoRunContainer(ctx, etcdHost.DClient, imageCfg, hostCfg, EtcdRestoreContainerName, etcdHost.Address, ETCDRole, prsMap); err != nil {
return err return err
} }
status, err := docker.WaitForContainer(ctx, etcdHost.DClient, etcdHost.Address, EtcdRestoreContainerName) status, err := docker.WaitForContainer(ctx, etcdHost.DClient, etcdHost.Address, EtcdRestoreContainerName, false)
if err != nil { if err != nil {
return err return err
} }
@@ -779,7 +790,7 @@ func RunEtcdSnapshotRemove(ctx context.Context, etcdHost *hosts.Host, prsMap map
if err := docker.DoRunContainer(ctx, etcdHost.DClient, imageCfg, hostCfg, EtcdSnapshotRemoveContainerName, etcdHost.Address, ETCDRole, prsMap); err != nil { if err := docker.DoRunContainer(ctx, etcdHost.DClient, imageCfg, hostCfg, EtcdSnapshotRemoveContainerName, etcdHost.Address, ETCDRole, prsMap); err != nil {
return err return err
} }
status, _, stderr, err := docker.GetContainerOutput(ctx, etcdHost.DClient, EtcdSnapshotRemoveContainerName, etcdHost.Address) status, _, stderr, err := docker.GetContainerOutput(ctx, etcdHost.DClient, EtcdSnapshotRemoveContainerName, etcdHost.Address, true)
if status != 0 || err != nil { if status != 0 || err != nil {
if removeErr := docker.RemoveContainer(ctx, etcdHost.DClient, etcdHost.Address, EtcdSnapshotRemoveContainerName); removeErr != nil { if removeErr := docker.RemoveContainer(ctx, etcdHost.DClient, etcdHost.Address, EtcdSnapshotRemoveContainerName); removeErr != nil {
log.Warnf(ctx, "Failed to remove container [%s]: %v", removeErr) log.Warnf(ctx, "Failed to remove container [%s]: %v", removeErr)
@@ -831,7 +842,7 @@ func GetEtcdSnapshotChecksum(ctx context.Context, etcdHost *hosts.Host, prsMap m
if err := docker.DoRunContainer(ctx, etcdHost.DClient, imageCfg, hostCfg, EtcdChecksumContainerName, etcdHost.Address, ETCDRole, prsMap); err != nil { if err := docker.DoRunContainer(ctx, etcdHost.DClient, imageCfg, hostCfg, EtcdChecksumContainerName, etcdHost.Address, ETCDRole, prsMap); err != nil {
return checksum, err return checksum, err
} }
if _, err := docker.WaitForContainer(ctx, etcdHost.DClient, etcdHost.Address, EtcdChecksumContainerName); err != nil { if _, err := docker.WaitForContainer(ctx, etcdHost.DClient, etcdHost.Address, EtcdChecksumContainerName, true); err != nil {
return checksum, err return checksum, err
} }
stderr, checksum, err = docker.GetContainerLogsStdoutStderr(ctx, etcdHost.DClient, EtcdChecksumContainerName, "1", false) stderr, checksum, err = docker.GetContainerLogsStdoutStderr(ctx, etcdHost.DClient, EtcdChecksumContainerName, "1", false)
@@ -999,7 +1010,7 @@ func DownloadEtcdSnapshotFromBackupServer(ctx context.Context, etcdHost *hosts.H
return err return err
} }
status, _, stderr, err := docker.GetContainerOutput(ctx, etcdHost.DClient, EtcdDownloadBackupContainerName, etcdHost.Address) status, _, stderr, err := docker.GetContainerOutput(ctx, etcdHost.DClient, EtcdDownloadBackupContainerName, etcdHost.Address, true)
if status != 0 || err != nil { if status != 0 || err != nil {
if removeErr := docker.RemoveContainer(ctx, etcdHost.DClient, etcdHost.Address, EtcdDownloadBackupContainerName); removeErr != nil { if removeErr := docker.RemoveContainer(ctx, etcdHost.DClient, etcdHost.Address, EtcdDownloadBackupContainerName); removeErr != nil {
log.Warnf(ctx, "Failed to remove container [%s]: %v", removeErr) log.Warnf(ctx, "Failed to remove container [%s]: %v", removeErr)

View File

@@ -60,13 +60,13 @@ const (
type RestartFunc func(context.Context, *hosts.Host) error type RestartFunc func(context.Context, *hosts.Host) error
func runSidekick(ctx context.Context, host *hosts.Host, prsMap map[string]v3.PrivateRegistry, sidecarProcess v3.Process, k8sVersion string) error { func runSidekick(ctx context.Context, host *hosts.Host, prsMap map[string]v3.PrivateRegistry, sidecarProcess v3.Process, k8sVersion string) error {
isRunning, err := docker.IsContainerRunning(ctx, host.DClient, host.Address, SidekickContainerName, true) exists, err := docker.DoesContainerExist(ctx, host.DClient, host.Address, SidekickContainerName, true)
if err != nil { if err != nil {
return err return err
} }
imageCfg, hostCfg, _ := GetProcessConfig(sidecarProcess, host, k8sVersion) imageCfg, hostCfg, _ := GetProcessConfig(sidecarProcess, host, k8sVersion)
isUpgradable := false isUpgradable := false
if isRunning { if exists {
isUpgradable, err = docker.IsContainerUpgradable(ctx, host.DClient, imageCfg, hostCfg, SidekickContainerName, host.Address, SidekickServiceName) isUpgradable, err = docker.IsContainerUpgradable(ctx, host.DClient, imageCfg, hostCfg, SidekickContainerName, host.Address, SidekickServiceName)
if err != nil { if err != nil {
return err return err