mirror of
https://github.com/rancher/rke.git
synced 2025-09-09 11:01:40 +00:00
Added retry for snapshot and s3 upload, verify etcd running on host (#2952)
Added retry for snapshot and s3 upload, verify etcd running on host Added option to quiet noisy container logs
This commit is contained in:
@@ -557,7 +557,6 @@ func (c *Cluster) StoreAddonConfigMap(ctx context.Context, addonYaml string, add
|
|||||||
timeout := make(chan bool, 1)
|
timeout := make(chan bool, 1)
|
||||||
go func() {
|
go func() {
|
||||||
for {
|
for {
|
||||||
|
|
||||||
updated, err = k8s.UpdateConfigMap(kubeClient, []byte(addonYaml), addonName)
|
updated, err = k8s.UpdateConfigMap(kubeClient, []byte(addonYaml), addonName)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
time.Sleep(time.Second * 5)
|
time.Sleep(time.Second * 5)
|
||||||
|
@@ -3,6 +3,7 @@ package cluster
|
|||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"strings"
|
||||||
|
|
||||||
"github.com/sirupsen/logrus"
|
"github.com/sirupsen/logrus"
|
||||||
|
|
||||||
@@ -17,16 +18,52 @@ import (
|
|||||||
|
|
||||||
func (c *Cluster) SnapshotEtcd(ctx context.Context, snapshotName string) error {
|
func (c *Cluster) SnapshotEtcd(ctx context.Context, snapshotName string) error {
|
||||||
backupImage := c.getBackupImage()
|
backupImage := c.getBackupImage()
|
||||||
for _, host := range c.EtcdHosts {
|
|
||||||
containerTimeout := DefaultEtcdBackupConfigTimeout
|
containerTimeout := DefaultEtcdBackupConfigTimeout
|
||||||
if c.Services.Etcd.BackupConfig != nil && c.Services.Etcd.BackupConfig.Timeout > 0 {
|
if c.Services.Etcd.BackupConfig != nil && c.Services.Etcd.BackupConfig.Timeout > 0 {
|
||||||
containerTimeout = c.Services.Etcd.BackupConfig.Timeout
|
containerTimeout = c.Services.Etcd.BackupConfig.Timeout
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// store first error message
|
||||||
|
var snapshotErr error
|
||||||
|
snapshotFailures := 0
|
||||||
|
s3UploadFailures := 0
|
||||||
|
|
||||||
|
for _, host := range c.EtcdHosts {
|
||||||
newCtx := context.WithValue(ctx, docker.WaitTimeoutContextKey, containerTimeout)
|
newCtx := context.WithValue(ctx, docker.WaitTimeoutContextKey, containerTimeout)
|
||||||
if err := services.RunEtcdSnapshotSave(newCtx, host, c.PrivateRegistriesMap, backupImage, snapshotName, true, c.Services.Etcd, c.Version); err != nil {
|
if err := services.RunEtcdSnapshotSave(newCtx, host, c.PrivateRegistriesMap, backupImage, snapshotName, true, c.Services.Etcd, c.Version); err != nil {
|
||||||
return err
|
if strings.Contains(err.Error(), "failed to upload etcd snapshot file to s3 on host") {
|
||||||
|
s3UploadFailures++
|
||||||
|
} else {
|
||||||
|
if snapshotErr == nil {
|
||||||
|
snapshotErr = err
|
||||||
|
}
|
||||||
|
snapshotFailures++
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if snapshotFailures == len(c.EtcdHosts) {
|
||||||
|
log.Warnf(ctx, "[etcd] Failed to take snapshot on all etcd hosts: %s", snapshotErr)
|
||||||
|
return fmt.Errorf("[etcd] Failed to take snapshot on all etcd hosts: %s", snapshotErr)
|
||||||
|
} else if snapshotFailures > 0 {
|
||||||
|
log.Warnf(ctx, "[etcd] Failed to take snapshot on %s etcd hosts", snapshotFailures)
|
||||||
|
} else {
|
||||||
|
log.Infof(ctx, "[etcd] Finished saving snapshot [%s] on all etcd hosts", snapshotName)
|
||||||
|
}
|
||||||
|
|
||||||
|
if c.Services.Etcd.BackupConfig.S3BackupConfig == nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
if s3UploadFailures >= len(c.EtcdHosts)-snapshotFailures {
|
||||||
|
log.Warnf(ctx, "[etcd] Failed to upload etcd snapshot file to s3 on all etcd hosts")
|
||||||
|
return fmt.Errorf("[etcd] Failed to upload etcd snapshot file to s3 on all etcd hosts")
|
||||||
|
} else if s3UploadFailures > 0 {
|
||||||
|
log.Warnf(ctx, "[etcd] Failed to upload etcd snapshot file to s3 on %s etcd hosts", s3UploadFailures)
|
||||||
|
} else {
|
||||||
|
log.Infof(ctx, "[etcd] Finished uploading etcd snapshot file to s3 on all etcd hosts")
|
||||||
|
}
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -112,13 +149,28 @@ func (c *Cluster) PrepareBackup(ctx context.Context, snapshotPath string) error
|
|||||||
var backupServer *hosts.Host
|
var backupServer *hosts.Host
|
||||||
backupImage := c.getBackupImage()
|
backupImage := c.getBackupImage()
|
||||||
var errors []error
|
var errors []error
|
||||||
if c.Services.Etcd.BackupConfig == nil || // legacy rke local backup
|
// s3 backup case
|
||||||
(c.Services.Etcd.BackupConfig != nil && c.Services.Etcd.BackupConfig.S3BackupConfig == nil) { // rancher local backup
|
if c.Services.Etcd.BackupConfig != nil &&
|
||||||
|
c.Services.Etcd.BackupConfig.S3BackupConfig != nil {
|
||||||
|
log.Infof(ctx, "[etcd] etcd s3 backup configuration found, will use s3 as source")
|
||||||
|
downloadFailed := false
|
||||||
|
for _, host := range c.EtcdHosts {
|
||||||
|
if err := services.DownloadEtcdSnapshotFromS3(ctx, host, c.PrivateRegistriesMap, backupImage, snapshotPath, c.Services.Etcd, c.Version); err != nil {
|
||||||
|
log.Warnf(ctx, "failed to download snapshot [%s] from s3 on host [%s]: %v", snapshotPath, host.Address, err)
|
||||||
|
downloadFailed = true
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
backupReady = !downloadFailed
|
||||||
|
}
|
||||||
|
// legacy rke local backup or rancher local backup
|
||||||
|
if !backupReady {
|
||||||
if c.Services.Etcd.BackupConfig == nil {
|
if c.Services.Etcd.BackupConfig == nil {
|
||||||
log.Infof(ctx, "[etcd] No etcd snapshot configuration found, will use local as source")
|
log.Infof(ctx, "[etcd] No etcd snapshot configuration found, will use local as source")
|
||||||
}
|
} else if c.Services.Etcd.BackupConfig.S3BackupConfig == nil {
|
||||||
if c.Services.Etcd.BackupConfig != nil && c.Services.Etcd.BackupConfig.S3BackupConfig == nil {
|
|
||||||
log.Infof(ctx, "[etcd] etcd snapshot configuration found and no s3 backup configuration found, will use local as source")
|
log.Infof(ctx, "[etcd] etcd snapshot configuration found and no s3 backup configuration found, will use local as source")
|
||||||
|
} else {
|
||||||
|
log.Warnf(ctx, "[etcd] etcd snapshot configuration found and s3 backup configuration failed, falling back to use local as source")
|
||||||
}
|
}
|
||||||
// stop etcd on all etcd nodes, we need this because we start the backup server on the same port
|
// stop etcd on all etcd nodes, we need this because we start the backup server on the same port
|
||||||
for _, host := range c.EtcdHosts {
|
for _, host := range c.EtcdHosts {
|
||||||
@@ -159,17 +211,6 @@ func (c *Cluster) PrepareBackup(ctx context.Context, snapshotPath string) error
|
|||||||
backupReady = true
|
backupReady = true
|
||||||
}
|
}
|
||||||
|
|
||||||
// s3 backup case
|
|
||||||
if c.Services.Etcd.BackupConfig != nil &&
|
|
||||||
c.Services.Etcd.BackupConfig.S3BackupConfig != nil {
|
|
||||||
log.Infof(ctx, "[etcd] etcd s3 backup configuration found, will use s3 as source")
|
|
||||||
for _, host := range c.EtcdHosts {
|
|
||||||
if err := services.DownloadEtcdSnapshotFromS3(ctx, host, c.PrivateRegistriesMap, backupImage, snapshotPath, c.Services.Etcd, c.Version); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
backupReady = true
|
|
||||||
}
|
|
||||||
if !backupReady {
|
if !backupReady {
|
||||||
return fmt.Errorf("failed to prepare backup for restore")
|
return fmt.Errorf("failed to prepare backup for restore")
|
||||||
}
|
}
|
||||||
|
@@ -124,7 +124,7 @@ func DoRunOnetimeContainer(ctx context.Context, dClient *client.Client, imageCfg
|
|||||||
}
|
}
|
||||||
log.Infof(ctx, "Successfully started [%s] container on host [%s]", containerName, hostname)
|
log.Infof(ctx, "Successfully started [%s] container on host [%s]", containerName, hostname)
|
||||||
log.Infof(ctx, "Waiting for [%s] container to exit on host [%s]", containerName, hostname)
|
log.Infof(ctx, "Waiting for [%s] container to exit on host [%s]", containerName, hostname)
|
||||||
exitCode, err := WaitForContainer(ctx, dClient, hostname, containerName)
|
exitCode, err := WaitForContainer(ctx, dClient, hostname, containerName, true)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("Container [%s] did not complete in time on host [%s]", containerName, hostname)
|
return fmt.Errorf("Container [%s] did not complete in time on host [%s]", containerName, hostname)
|
||||||
}
|
}
|
||||||
@@ -164,11 +164,11 @@ func DoRollingUpdateContainer(ctx context.Context, dClient *client.Client, image
|
|||||||
return fmt.Errorf("[%s] Failed rolling update of container: docker client is nil for container [%s] on host [%s]", plane, containerName, hostname)
|
return fmt.Errorf("[%s] Failed rolling update of container: docker client is nil for container [%s] on host [%s]", plane, containerName, hostname)
|
||||||
}
|
}
|
||||||
logrus.Debugf("[%s] Checking for deployed [%s]", plane, containerName)
|
logrus.Debugf("[%s] Checking for deployed [%s]", plane, containerName)
|
||||||
isRunning, err := IsContainerRunning(ctx, dClient, hostname, containerName, false)
|
exists, err := DoesContainerExist(ctx, dClient, hostname, containerName, false)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
if !isRunning {
|
if !exists {
|
||||||
logrus.Debugf("[%s] Container %s is not running on host [%s]", plane, containerName, hostname)
|
logrus.Debugf("[%s] Container %s is not running on host [%s]", plane, containerName, hostname)
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
@@ -217,30 +217,52 @@ func DoRemoveContainer(ctx context.Context, dClient *client.Client, containerNam
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func IsContainerRunning(ctx context.Context, dClient *client.Client, hostname string, containerName string, all bool) (bool, error) {
|
func FindContainer(ctx context.Context, dClient *client.Client, hostname string, containerName string, all bool) (*types.Container, error) {
|
||||||
if dClient == nil {
|
if dClient == nil {
|
||||||
return false, fmt.Errorf("Failed to check if container is running: docker client is nil for container [%s] on host [%s]", containerName, hostname)
|
return nil, fmt.Errorf("Failed to find container: docker client is nil for container [%s] on host [%s]", containerName, hostname)
|
||||||
}
|
}
|
||||||
var containers []types.Container
|
var containers []types.Container
|
||||||
var err error
|
var err error
|
||||||
for i := 1; i <= RetryCount; i++ {
|
for i := 1; i <= RetryCount; i++ {
|
||||||
logrus.Infof("Checking if container [%s] is running on host [%s], try #%d", containerName, hostname, i)
|
logrus.Infof("Finding container [%s] on host [%s], try #%d", containerName, hostname, i)
|
||||||
containers, err = dClient.ContainerList(ctx, types.ContainerListOptions{All: all})
|
containers, err = dClient.ContainerList(ctx, types.ContainerListOptions{All: all})
|
||||||
if err != nil {
|
if err != nil {
|
||||||
logrus.Warnf("Error checking if container [%s] is running on host [%s]: %v", containerName, hostname, err)
|
logrus.Warnf("Error finding container [%s] exists on host [%s]: %v", containerName, hostname, err)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return false, fmt.Errorf("Error checking if container [%s] is running on host [%s]: %v", containerName, hostname, err)
|
return nil, fmt.Errorf("Error checking if container [%s] exists on host [%s]: %v", containerName, hostname, err)
|
||||||
}
|
}
|
||||||
for _, container := range containers {
|
for _, container := range containers {
|
||||||
if len(container.Names) != 0 && container.Names[0] == "/"+containerName {
|
if len(container.Names) != 0 && container.Names[0] == "/"+containerName {
|
||||||
return true, nil
|
return &container, nil
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return false, nil
|
return nil, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func DoesContainerExist(ctx context.Context, dClient *client.Client, hostname string, containerName string, all bool) (bool, error) {
|
||||||
|
if dClient == nil {
|
||||||
|
return false, fmt.Errorf("Failed to check if container exists: docker client is nil for container [%s] on host [%s]", containerName, hostname)
|
||||||
|
}
|
||||||
|
container, err := FindContainer(ctx, dClient, hostname, containerName, all)
|
||||||
|
if err != nil {
|
||||||
|
return false, fmt.Errorf("Error checking if container [%s] is running on host [%s]: %v", containerName, hostname, err)
|
||||||
|
}
|
||||||
|
return container != nil, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func IsContainerRunning(ctx context.Context, dClient *client.Client, hostname string, containerName string, all bool) (bool, error) {
|
||||||
|
if dClient == nil {
|
||||||
|
return false, fmt.Errorf("Failed to check if container is running: docker client is nil for container [%s] on host [%s]", containerName, hostname)
|
||||||
|
}
|
||||||
|
container, err := FindContainer(ctx, dClient, hostname, containerName, all)
|
||||||
|
if err != nil {
|
||||||
|
return false, fmt.Errorf("Error checking if container [%s] is running on host [%s]: %v", containerName, hostname, err)
|
||||||
|
}
|
||||||
|
return container != nil && container.State == "running", nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func localImageExists(ctx context.Context, dClient *client.Client, hostname string, containerImage string) error {
|
func localImageExists(ctx context.Context, dClient *client.Client, hostname string, containerImage string) error {
|
||||||
@@ -476,7 +498,7 @@ func StopRenameContainer(ctx context.Context, dClient *client.Client, hostname s
|
|||||||
return fmt.Errorf("Failed to stop and rename container: docker client is nil for container [%s] on host [%s]", oldContainerName, hostname)
|
return fmt.Errorf("Failed to stop and rename container: docker client is nil for container [%s] on host [%s]", oldContainerName, hostname)
|
||||||
}
|
}
|
||||||
// make sure we don't have an old old-container from a previous broken update
|
// make sure we don't have an old old-container from a previous broken update
|
||||||
exists, err := IsContainerRunning(ctx, dClient, hostname, newContainerName, true)
|
exists, err := DoesContainerExist(ctx, dClient, hostname, newContainerName, true)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
@@ -488,14 +510,14 @@ func StopRenameContainer(ctx context.Context, dClient *client.Client, hostname s
|
|||||||
if err := StopContainer(ctx, dClient, hostname, oldContainerName); err != nil {
|
if err := StopContainer(ctx, dClient, hostname, oldContainerName); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
if _, err := WaitForContainer(ctx, dClient, hostname, oldContainerName); err != nil {
|
if _, err := WaitForContainer(ctx, dClient, hostname, oldContainerName, true); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
return RenameContainer(ctx, dClient, hostname, oldContainerName, newContainerName)
|
return RenameContainer(ctx, dClient, hostname, oldContainerName, newContainerName)
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func WaitForContainer(ctx context.Context, dClient *client.Client, hostname string, containerName string) (int64, error) {
|
func WaitForContainer(ctx context.Context, dClient *client.Client, hostname string, containerName string, noisy bool) (int64, error) {
|
||||||
if dClient == nil {
|
if dClient == nil {
|
||||||
return 1, fmt.Errorf("Failed waiting for container: docker client is nil for container [%s] on host [%s]", containerName, hostname)
|
return 1, fmt.Errorf("Failed waiting for container: docker client is nil for container [%s] on host [%s]", containerName, hostname)
|
||||||
}
|
}
|
||||||
@@ -504,8 +526,9 @@ func WaitForContainer(ctx context.Context, dClient *client.Client, hostname stri
|
|||||||
if v, ok := ctx.Value(WaitTimeoutContextKey).(int); ok && v > 0 {
|
if v, ok := ctx.Value(WaitTimeoutContextKey).(int); ok && v > 0 {
|
||||||
containerTimeout = v
|
containerTimeout = v
|
||||||
}
|
}
|
||||||
for retries := 0; retries < containerTimeout; retries++ {
|
|
||||||
log.Infof(ctx, "Waiting for [%s] container to exit on host [%s]", containerName, hostname)
|
log.Infof(ctx, "Waiting for [%s] container to exit on host [%s]", containerName, hostname)
|
||||||
|
var lastStdout, lastStderr string
|
||||||
|
for retries := 0; retries < containerTimeout; retries++ {
|
||||||
container, err := InspectContainer(ctx, dClient, hostname, containerName)
|
container, err := InspectContainer(ctx, dClient, hostname, containerName)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return 1, fmt.Errorf("Could not inspect container [%s] on host [%s]: %s", containerName, hostname, err)
|
return 1, fmt.Errorf("Could not inspect container [%s] on host [%s]: %s", containerName, hostname, err)
|
||||||
@@ -515,8 +538,12 @@ func WaitForContainer(ctx context.Context, dClient *client.Client, hostname stri
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
logrus.Warnf("Failed to get container logs from container [%s] on host [%s]: %v", containerName, hostname, err)
|
logrus.Warnf("Failed to get container logs from container [%s] on host [%s]: %v", containerName, hostname, err)
|
||||||
}
|
}
|
||||||
|
if noisy || lastStdout != stdout || lastStderr != stderr {
|
||||||
log.Infof(ctx, "Container [%s] is still running on host [%s]: stderr: [%s], stdout: [%s]", containerName, hostname, stderr, stdout)
|
log.Infof(ctx, "Container [%s] is still running on host [%s]: stderr: [%s], stdout: [%s]", containerName, hostname, stderr, stdout)
|
||||||
|
lastStdout = stdout
|
||||||
|
lastStderr = stderr
|
||||||
|
}
|
||||||
|
|
||||||
time.Sleep(1 * time.Second)
|
time.Sleep(1 * time.Second)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
@@ -789,11 +816,11 @@ func DoRestartContainer(ctx context.Context, dClient *client.Client, containerNa
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func GetContainerOutput(ctx context.Context, dClient *client.Client, containerName, hostname string) (int64, string, string, error) {
|
func GetContainerOutput(ctx context.Context, dClient *client.Client, containerName, hostname string, noisy bool) (int64, string, string, error) {
|
||||||
if dClient == nil {
|
if dClient == nil {
|
||||||
return 1, "", "", fmt.Errorf("Failed to get container output: docker client is nil for container [%s] on host [%s]", containerName, hostname)
|
return 1, "", "", fmt.Errorf("Failed to get container output: docker client is nil for container [%s] on host [%s]", containerName, hostname)
|
||||||
}
|
}
|
||||||
status, err := WaitForContainer(ctx, dClient, hostname, containerName)
|
status, err := WaitForContainer(ctx, dClient, hostname, containerName, noisy)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return 1, "", "", err
|
return 1, "", "", err
|
||||||
}
|
}
|
||||||
|
@@ -135,7 +135,7 @@ func (h *Host) CleanUp(ctx context.Context, toCleanPaths []string, cleanerImage
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
if _, err := docker.WaitForContainer(ctx, h.DClient, h.Address, CleanerContainerName); err != nil {
|
if _, err := docker.WaitForContainer(ctx, h.DClient, h.Address, CleanerContainerName, true); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -116,7 +116,7 @@ func DeployStateOnPlaneHost(ctx context.Context, host *hosts.Host, stateDownload
|
|||||||
logrus.Warnf("[state] Error during copying state file [%s] to node [%s]: %v", stateFilePath, host.Address, err)
|
logrus.Warnf("[state] Error during copying state file [%s] to node [%s]: %v", stateFilePath, host.Address, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
if _, err := docker.WaitForContainer(ctx, host.DClient, host.Address, StateDeployerContainerName); err != nil {
|
if _, err := docker.WaitForContainer(ctx, host.DClient, host.Address, StateDeployerContainerName, true); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -125,12 +125,11 @@ func DeployStateOnPlaneHost(ctx context.Context, host *hosts.Host, stateDownload
|
|||||||
|
|
||||||
func doRunDeployer(ctx context.Context, host *hosts.Host, containerEnv []string, certDownloaderImage string, prsMap map[string]v3.PrivateRegistry, k8sVersion string) error {
|
func doRunDeployer(ctx context.Context, host *hosts.Host, containerEnv []string, certDownloaderImage string, prsMap map[string]v3.PrivateRegistry, k8sVersion string) error {
|
||||||
// remove existing container. Only way it's still here is if previous deployment failed
|
// remove existing container. Only way it's still here is if previous deployment failed
|
||||||
isRunning := false
|
exists, err := docker.DoesContainerExist(ctx, host.DClient, host.Address, CrtDownloaderContainer, true)
|
||||||
isRunning, err := docker.IsContainerRunning(ctx, host.DClient, host.Address, CrtDownloaderContainer, true)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
if isRunning {
|
if exists {
|
||||||
if err := docker.RemoveContainer(ctx, host.DClient, host.Address, CrtDownloaderContainer); err != nil {
|
if err := docker.RemoveContainer(ctx, host.DClient, host.Address, CrtDownloaderContainer); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
@@ -188,7 +187,7 @@ func doRunDeployer(ctx context.Context, host *hosts.Host, containerEnv []string,
|
|||||||
}
|
}
|
||||||
logrus.Debugf("[certificates] Successfully started Certificate deployer container: %s", CrtDownloaderContainer)
|
logrus.Debugf("[certificates] Successfully started Certificate deployer container: %s", CrtDownloaderContainer)
|
||||||
for {
|
for {
|
||||||
isDeployerRunning, err := docker.IsContainerRunning(ctx, host.DClient, host.Address, CrtDownloaderContainer, false)
|
isDeployerRunning, err := docker.DoesContainerExist(ctx, host.DClient, host.Address, CrtDownloaderContainer, false)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
@@ -331,11 +330,11 @@ func FetchFileFromHost(ctx context.Context, filePath, image string, host *hosts.
|
|||||||
Binds: Binds,
|
Binds: Binds,
|
||||||
Privileged: true,
|
Privileged: true,
|
||||||
}
|
}
|
||||||
isRunning, err := docker.IsContainerRunning(ctx, host.DClient, host.Address, containerName, true)
|
exists, err := docker.DoesContainerExist(ctx, host.DClient, host.Address, containerName, true)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return "", err
|
return "", err
|
||||||
}
|
}
|
||||||
if !isRunning {
|
if !exists {
|
||||||
if err := docker.DoRunContainer(ctx, host.DClient, imageCfg, hostCfg, containerName, host.Address, state, prsMap); err != nil {
|
if err := docker.DoRunContainer(ctx, host.DClient, imageCfg, hostCfg, containerName, host.Address, state, prsMap); err != nil {
|
||||||
return "", err
|
return "", err
|
||||||
}
|
}
|
||||||
|
@@ -131,7 +131,7 @@ func SaveBackupBundleOnHost(ctx context.Context, host *hosts.Host, alpineSystemI
|
|||||||
if err := docker.DoRunContainer(ctx, host.DClient, imageCfg, hostCfg, BundleCertContainer, host.Address, "certificates", prsMap); err != nil {
|
if err := docker.DoRunContainer(ctx, host.DClient, imageCfg, hostCfg, BundleCertContainer, host.Address, "certificates", prsMap); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
status, err := docker.WaitForContainer(ctx, host.DClient, host.Address, BundleCertContainer)
|
status, err := docker.WaitForContainer(ctx, host.DClient, host.Address, BundleCertContainer, true)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
@@ -439,7 +439,6 @@ func RunEtcdSnapshotSave(ctx context.Context, etcdHost *hosts.Host, prsMap map[s
|
|||||||
if hosts.IsDockerSELinuxEnabled(etcdHost) {
|
if hosts.IsDockerSELinuxEnabled(etcdHost) {
|
||||||
hostCfg.SecurityOpt = append(hostCfg.SecurityOpt, SELinuxLabel)
|
hostCfg.SecurityOpt = append(hostCfg.SecurityOpt, SELinuxLabel)
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
hostCfg.Binds = binds
|
hostCfg.Binds = binds
|
||||||
|
|
||||||
@@ -449,18 +448,30 @@ func RunEtcdSnapshotSave(ctx context.Context, etcdHost *hosts.Host, prsMap map[s
|
|||||||
if err := docker.DoRemoveContainer(ctx, etcdHost.DClient, EtcdSnapshotOnceContainerName, etcdHost.Address); err != nil {
|
if err := docker.DoRemoveContainer(ctx, etcdHost.DClient, EtcdSnapshotOnceContainerName, etcdHost.Address); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// If the etcd container is not running the snapshot will never succeed
|
||||||
|
log.Debugf(ctx, "[etcd] Checking if etcd is running on host [%s]", etcdHost.Address)
|
||||||
|
if running, err := docker.IsContainerRunning(ctx, etcdHost.DClient, etcdHost.Address, "etcd", true); err != nil {
|
||||||
|
return err
|
||||||
|
} else if !running {
|
||||||
|
return fmt.Errorf("etcd is not running on host [%s]", etcdHost.Address)
|
||||||
|
}
|
||||||
|
|
||||||
if err := docker.DoRunContainer(ctx, etcdHost.DClient, imageCfg, hostCfg, EtcdSnapshotOnceContainerName, etcdHost.Address, ETCDRole, prsMap); err != nil {
|
if err := docker.DoRunContainer(ctx, etcdHost.DClient, imageCfg, hostCfg, EtcdSnapshotOnceContainerName, etcdHost.Address, ETCDRole, prsMap); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
status, _, stderr, err := docker.GetContainerOutput(ctx, etcdHost.DClient, EtcdSnapshotOnceContainerName, etcdHost.Address)
|
status, _, stderr, err := docker.GetContainerOutput(ctx, etcdHost.DClient, EtcdSnapshotOnceContainerName, etcdHost.Address, false)
|
||||||
if status != 0 || err != nil {
|
if status != 0 || err != nil {
|
||||||
if removeErr := docker.RemoveContainer(ctx, etcdHost.DClient, etcdHost.Address, EtcdSnapshotOnceContainerName); removeErr != nil {
|
if removeErr := docker.RemoveContainer(ctx, etcdHost.DClient, etcdHost.Address, EtcdSnapshotOnceContainerName); removeErr != nil {
|
||||||
log.Warnf(ctx, "[etcd] Failed to remove container [%s] on host [%s]: %v", removeErr, etcdHost.Address)
|
log.Warnf(ctx, "[etcd] Failed to remove container [%s] on host [%s]: %v", EtcdSnapshotOnceContainerName, removeErr, etcdHost.Address)
|
||||||
}
|
}
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
return fmt.Errorf("[etcd] Failed to take one-time snapshot on host [%s], exit code [%d]: %v", etcdHost.Address, status, stderr)
|
if strings.Contains(stderr, "failed to upload etcd snapshot file") {
|
||||||
|
return fmt.Errorf("failed to upload etcd snapshot file to s3 on host [%s], exit code [%d]: %v", etcdHost.Address, status, stderr)
|
||||||
|
}
|
||||||
|
return fmt.Errorf("failed to take one-time snapshot on host [%s], exit code [%d]: %v", etcdHost.Address, status, stderr)
|
||||||
}
|
}
|
||||||
|
|
||||||
return docker.RemoveContainer(ctx, etcdHost.DClient, etcdHost.Address, EtcdSnapshotOnceContainerName)
|
return docker.RemoveContainer(ctx, etcdHost.DClient, etcdHost.Address, EtcdSnapshotOnceContainerName)
|
||||||
@@ -618,7 +629,7 @@ func DownloadEtcdSnapshotFromS3(ctx context.Context, etcdHost *hosts.Host, prsMa
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
status, _, stderr, err := docker.GetContainerOutput(ctx, etcdHost.DClient, EtcdDownloadBackupContainerName, etcdHost.Address)
|
status, _, stderr, err := docker.GetContainerOutput(ctx, etcdHost.DClient, EtcdDownloadBackupContainerName, etcdHost.Address, true)
|
||||||
if status != 0 || err != nil {
|
if status != 0 || err != nil {
|
||||||
if removeErr := docker.RemoveContainer(ctx, etcdHost.DClient, etcdHost.Address, EtcdDownloadBackupContainerName); removeErr != nil {
|
if removeErr := docker.RemoveContainer(ctx, etcdHost.DClient, etcdHost.Address, EtcdDownloadBackupContainerName); removeErr != nil {
|
||||||
log.Warnf(ctx, "Failed to remove container [%s]: %v", removeErr)
|
log.Warnf(ctx, "Failed to remove container [%s]: %v", removeErr)
|
||||||
@@ -689,7 +700,7 @@ func RestoreEtcdSnapshot(ctx context.Context, etcdHost *hosts.Host, prsMap map[s
|
|||||||
if err := docker.DoRunContainer(ctx, etcdHost.DClient, imageCfg, hostCfg, EtcdRestoreContainerName, etcdHost.Address, ETCDRole, prsMap); err != nil {
|
if err := docker.DoRunContainer(ctx, etcdHost.DClient, imageCfg, hostCfg, EtcdRestoreContainerName, etcdHost.Address, ETCDRole, prsMap); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
status, err := docker.WaitForContainer(ctx, etcdHost.DClient, etcdHost.Address, EtcdRestoreContainerName)
|
status, err := docker.WaitForContainer(ctx, etcdHost.DClient, etcdHost.Address, EtcdRestoreContainerName, false)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
@@ -779,7 +790,7 @@ func RunEtcdSnapshotRemove(ctx context.Context, etcdHost *hosts.Host, prsMap map
|
|||||||
if err := docker.DoRunContainer(ctx, etcdHost.DClient, imageCfg, hostCfg, EtcdSnapshotRemoveContainerName, etcdHost.Address, ETCDRole, prsMap); err != nil {
|
if err := docker.DoRunContainer(ctx, etcdHost.DClient, imageCfg, hostCfg, EtcdSnapshotRemoveContainerName, etcdHost.Address, ETCDRole, prsMap); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
status, _, stderr, err := docker.GetContainerOutput(ctx, etcdHost.DClient, EtcdSnapshotRemoveContainerName, etcdHost.Address)
|
status, _, stderr, err := docker.GetContainerOutput(ctx, etcdHost.DClient, EtcdSnapshotRemoveContainerName, etcdHost.Address, true)
|
||||||
if status != 0 || err != nil {
|
if status != 0 || err != nil {
|
||||||
if removeErr := docker.RemoveContainer(ctx, etcdHost.DClient, etcdHost.Address, EtcdSnapshotRemoveContainerName); removeErr != nil {
|
if removeErr := docker.RemoveContainer(ctx, etcdHost.DClient, etcdHost.Address, EtcdSnapshotRemoveContainerName); removeErr != nil {
|
||||||
log.Warnf(ctx, "Failed to remove container [%s]: %v", removeErr)
|
log.Warnf(ctx, "Failed to remove container [%s]: %v", removeErr)
|
||||||
@@ -831,7 +842,7 @@ func GetEtcdSnapshotChecksum(ctx context.Context, etcdHost *hosts.Host, prsMap m
|
|||||||
if err := docker.DoRunContainer(ctx, etcdHost.DClient, imageCfg, hostCfg, EtcdChecksumContainerName, etcdHost.Address, ETCDRole, prsMap); err != nil {
|
if err := docker.DoRunContainer(ctx, etcdHost.DClient, imageCfg, hostCfg, EtcdChecksumContainerName, etcdHost.Address, ETCDRole, prsMap); err != nil {
|
||||||
return checksum, err
|
return checksum, err
|
||||||
}
|
}
|
||||||
if _, err := docker.WaitForContainer(ctx, etcdHost.DClient, etcdHost.Address, EtcdChecksumContainerName); err != nil {
|
if _, err := docker.WaitForContainer(ctx, etcdHost.DClient, etcdHost.Address, EtcdChecksumContainerName, true); err != nil {
|
||||||
return checksum, err
|
return checksum, err
|
||||||
}
|
}
|
||||||
stderr, checksum, err = docker.GetContainerLogsStdoutStderr(ctx, etcdHost.DClient, EtcdChecksumContainerName, "1", false)
|
stderr, checksum, err = docker.GetContainerLogsStdoutStderr(ctx, etcdHost.DClient, EtcdChecksumContainerName, "1", false)
|
||||||
@@ -999,7 +1010,7 @@ func DownloadEtcdSnapshotFromBackupServer(ctx context.Context, etcdHost *hosts.H
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
status, _, stderr, err := docker.GetContainerOutput(ctx, etcdHost.DClient, EtcdDownloadBackupContainerName, etcdHost.Address)
|
status, _, stderr, err := docker.GetContainerOutput(ctx, etcdHost.DClient, EtcdDownloadBackupContainerName, etcdHost.Address, true)
|
||||||
if status != 0 || err != nil {
|
if status != 0 || err != nil {
|
||||||
if removeErr := docker.RemoveContainer(ctx, etcdHost.DClient, etcdHost.Address, EtcdDownloadBackupContainerName); removeErr != nil {
|
if removeErr := docker.RemoveContainer(ctx, etcdHost.DClient, etcdHost.Address, EtcdDownloadBackupContainerName); removeErr != nil {
|
||||||
log.Warnf(ctx, "Failed to remove container [%s]: %v", removeErr)
|
log.Warnf(ctx, "Failed to remove container [%s]: %v", removeErr)
|
||||||
|
@@ -60,13 +60,13 @@ const (
|
|||||||
type RestartFunc func(context.Context, *hosts.Host) error
|
type RestartFunc func(context.Context, *hosts.Host) error
|
||||||
|
|
||||||
func runSidekick(ctx context.Context, host *hosts.Host, prsMap map[string]v3.PrivateRegistry, sidecarProcess v3.Process, k8sVersion string) error {
|
func runSidekick(ctx context.Context, host *hosts.Host, prsMap map[string]v3.PrivateRegistry, sidecarProcess v3.Process, k8sVersion string) error {
|
||||||
isRunning, err := docker.IsContainerRunning(ctx, host.DClient, host.Address, SidekickContainerName, true)
|
exists, err := docker.DoesContainerExist(ctx, host.DClient, host.Address, SidekickContainerName, true)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
imageCfg, hostCfg, _ := GetProcessConfig(sidecarProcess, host, k8sVersion)
|
imageCfg, hostCfg, _ := GetProcessConfig(sidecarProcess, host, k8sVersion)
|
||||||
isUpgradable := false
|
isUpgradable := false
|
||||||
if isRunning {
|
if exists {
|
||||||
isUpgradable, err = docker.IsContainerUpgradable(ctx, host.DClient, imageCfg, hostCfg, SidekickContainerName, host.Address, SidekickServiceName)
|
isUpgradable, err = docker.IsContainerUpgradable(ctx, host.DClient, imageCfg, hostCfg, SidekickContainerName, host.Address, SidekickServiceName)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
|
Reference in New Issue
Block a user