1
0
mirror of https://github.com/rancher/rke.git synced 2025-04-27 11:21:08 +00:00

Added etcd snapshot timeout parameter

This commit is contained in:
rawmind0 2021-01-22 18:35:13 +01:00
parent e395badf82
commit 1880404fc3
4 changed files with 28 additions and 4 deletions

View File

@ -54,6 +54,7 @@ const (
DefaultMonitoringProvider = "metrics-server"
DefaultEtcdBackupConfigIntervalHours = 12
DefaultEtcdBackupConfigRetention = 6
DefaultEtcdBackupConfigTimeout = docker.WaitTimeout
DefaultDNSProvider = "kube-dns"
K8sVersionCoreDNS = "1.14.0"
@ -336,6 +337,9 @@ func (c *Cluster) setClusterServicesDefaults() {
if c.Services.Etcd.BackupConfig.Retention == 0 {
c.Services.Etcd.BackupConfig.Retention = DefaultEtcdBackupConfigRetention
}
if c.Services.Etcd.BackupConfig.Timeout == 0 {
c.Services.Etcd.BackupConfig.Timeout = DefaultEtcdBackupConfigTimeout
}
}
if _, ok := c.Services.KubeAPI.ExtraArgs[KubeAPIArgAdmissionControlConfigFile]; !ok {

View File

@ -18,7 +18,12 @@ import (
func (c *Cluster) SnapshotEtcd(ctx context.Context, snapshotName string) error {
backupImage := c.getBackupImage()
for _, host := range c.EtcdHosts {
if err := services.RunEtcdSnapshotSave(ctx, host, c.PrivateRegistriesMap, backupImage, snapshotName, true, c.Services.Etcd); err != nil {
containerTimeout := DefaultEtcdBackupConfigTimeout
if c.Services.Etcd.BackupConfig != nil && c.Services.Etcd.BackupConfig.Timeout > 0 {
containerTimeout = c.Services.Etcd.BackupConfig.Timeout
}
newCtx := context.WithValue(ctx, docker.WaitTimeoutContextKey, containerTimeout)
if err := services.RunEtcdSnapshotSave(newCtx, host, c.PrivateRegistriesMap, backupImage, snapshotName, true, c.Services.Etcd); err != nil {
return err
}
}
@ -175,7 +180,12 @@ func (c *Cluster) RestoreEtcdSnapshot(ctx context.Context, snapshotPath string)
initCluster := services.GetEtcdInitialCluster(c.EtcdHosts)
backupImage := c.getBackupImage()
for _, host := range c.EtcdHosts {
if err := services.RestoreEtcdSnapshot(ctx, host, c.PrivateRegistriesMap, c.SystemImages.Etcd, backupImage,
containerTimeout := DefaultEtcdBackupConfigTimeout
if c.Services.Etcd.BackupConfig != nil && c.Services.Etcd.BackupConfig.Timeout > 0 {
containerTimeout = c.Services.Etcd.BackupConfig.Timeout
}
newCtx := context.WithValue(ctx, docker.WaitTimeoutContextKey, containerTimeout)
if err := services.RestoreEtcdSnapshot(newCtx, host, c.PrivateRegistriesMap, c.SystemImages.Etcd, backupImage,
snapshotPath, initCluster, c.Services.Etcd); err != nil {
return fmt.Errorf("[etcd] Failed to restore etcd snapshot: %v", err)
}

View File

@ -34,6 +34,10 @@ const (
StopTimeout = 5
// RetryCount is the amount of retries for Docker operations
RetryCount = 3
// WaitTimeout in seconds
WaitTimeout = 300
// WaitTimeoutContextKey name
WaitTimeoutContextKey = "wait_timeout"
)
type dockerConfig struct {
@ -492,8 +496,12 @@ func WaitForContainer(ctx context.Context, dClient *client.Client, hostname stri
if dClient == nil {
return 1, fmt.Errorf("Failed waiting for container: docker client is nil for container [%s] on host [%s]", containerName, hostname)
}
// 5 minutes timeout, especially for transferring snapshots
for retries := 0; retries < 300; retries++ {
// Set containerTimeout value from context or default. Especially for transferring snapshots
containerTimeout := WaitTimeout
if v, ok := ctx.Value(WaitTimeoutContextKey).(int); ok && v > 0 {
containerTimeout = v
}
for retries := 0; retries < containerTimeout; retries++ {
log.Infof(ctx, "Waiting for [%s] container to exit on host [%s]", containerName, hostname)
container, err := InspectContainer(ctx, dClient, hostname, containerName)
if err != nil {

View File

@ -21,6 +21,8 @@ type BackupConfig struct {
S3BackupConfig *S3BackupConfig `yaml:",omitempty" json:"s3BackupConfig"`
// replace special characters in snapshot names
SafeTimestamp bool `yaml:"safe_timestamp" json:"safeTimestamp,omitempty"`
// Backup execution timeout
Timeout int `yaml:"timeout" json:"timeout,omitempty" norman:"default=300"`
}
type S3BackupConfig struct {