From 1880404fc3e5b652ee92ff1bf4d05db9be060c2c Mon Sep 17 00:00:00 2001 From: rawmind0 Date: Fri, 22 Jan 2021 18:35:13 +0100 Subject: [PATCH] Added etcd snapshot timeout parameter --- cluster/defaults.go | 4 ++++ cluster/etcd.go | 14 ++++++++++++-- docker/docker.go | 12 ++++++++++-- types/backup_types.go | 2 ++ 4 files changed, 28 insertions(+), 4 deletions(-) diff --git a/cluster/defaults.go b/cluster/defaults.go index 52ea5672..6dc0ea31 100644 --- a/cluster/defaults.go +++ b/cluster/defaults.go @@ -54,6 +54,7 @@ const ( DefaultMonitoringProvider = "metrics-server" DefaultEtcdBackupConfigIntervalHours = 12 DefaultEtcdBackupConfigRetention = 6 + DefaultEtcdBackupConfigTimeout = docker.WaitTimeout DefaultDNSProvider = "kube-dns" K8sVersionCoreDNS = "1.14.0" @@ -336,6 +337,9 @@ func (c *Cluster) setClusterServicesDefaults() { if c.Services.Etcd.BackupConfig.Retention == 0 { c.Services.Etcd.BackupConfig.Retention = DefaultEtcdBackupConfigRetention } + if c.Services.Etcd.BackupConfig.Timeout == 0 { + c.Services.Etcd.BackupConfig.Timeout = DefaultEtcdBackupConfigTimeout + } } if _, ok := c.Services.KubeAPI.ExtraArgs[KubeAPIArgAdmissionControlConfigFile]; !ok { diff --git a/cluster/etcd.go b/cluster/etcd.go index a78a7cfd..cd45156d 100644 --- a/cluster/etcd.go +++ b/cluster/etcd.go @@ -18,7 +18,12 @@ import ( func (c *Cluster) SnapshotEtcd(ctx context.Context, snapshotName string) error { backupImage := c.getBackupImage() for _, host := range c.EtcdHosts { - if err := services.RunEtcdSnapshotSave(ctx, host, c.PrivateRegistriesMap, backupImage, snapshotName, true, c.Services.Etcd); err != nil { + containerTimeout := DefaultEtcdBackupConfigTimeout + if c.Services.Etcd.BackupConfig != nil && c.Services.Etcd.BackupConfig.Timeout > 0 { + containerTimeout = c.Services.Etcd.BackupConfig.Timeout + } + newCtx := context.WithValue(ctx, docker.WaitTimeoutContextKey, containerTimeout) + if err := services.RunEtcdSnapshotSave(newCtx, host, c.PrivateRegistriesMap, backupImage, snapshotName, true, c.Services.Etcd); err != nil { return err } } @@ -175,7 +180,12 @@ func (c *Cluster) RestoreEtcdSnapshot(ctx context.Context, snapshotPath string) initCluster := services.GetEtcdInitialCluster(c.EtcdHosts) backupImage := c.getBackupImage() for _, host := range c.EtcdHosts { - if err := services.RestoreEtcdSnapshot(ctx, host, c.PrivateRegistriesMap, c.SystemImages.Etcd, backupImage, + containerTimeout := DefaultEtcdBackupConfigTimeout + if c.Services.Etcd.BackupConfig != nil && c.Services.Etcd.BackupConfig.Timeout > 0 { + containerTimeout = c.Services.Etcd.BackupConfig.Timeout + } + newCtx := context.WithValue(ctx, docker.WaitTimeoutContextKey, containerTimeout) + if err := services.RestoreEtcdSnapshot(newCtx, host, c.PrivateRegistriesMap, c.SystemImages.Etcd, backupImage, snapshotPath, initCluster, c.Services.Etcd); err != nil { return fmt.Errorf("[etcd] Failed to restore etcd snapshot: %v", err) } diff --git a/docker/docker.go b/docker/docker.go index c7054dfc..fb2e89bf 100644 --- a/docker/docker.go +++ b/docker/docker.go @@ -34,6 +34,10 @@ const ( StopTimeout = 5 // RetryCount is the amount of retries for Docker operations RetryCount = 3 + // WaitTimeout in seconds + WaitTimeout = 300 + // WaitTimeoutContextKey name + WaitTimeoutContextKey = "wait_timeout" ) type dockerConfig struct { @@ -492,8 +496,12 @@ func WaitForContainer(ctx context.Context, dClient *client.Client, hostname stri if dClient == nil { return 1, fmt.Errorf("Failed waiting for container: docker client is nil for container [%s] on host [%s]", containerName, hostname) } - // 5 minutes timeout, especially for transferring snapshots - for retries := 0; retries < 300; retries++ { + // Set containerTimeout value from context or default. Especially for transferring snapshots + containerTimeout := WaitTimeout + if v, ok := ctx.Value(WaitTimeoutContextKey).(int); ok && v > 0 { + containerTimeout = v + } + for retries := 0; retries < containerTimeout; retries++ { log.Infof(ctx, "Waiting for [%s] container to exit on host [%s]", containerName, hostname) container, err := InspectContainer(ctx, dClient, hostname, containerName) if err != nil { diff --git a/types/backup_types.go b/types/backup_types.go index 6e1456a6..ee2407a6 100644 --- a/types/backup_types.go +++ b/types/backup_types.go @@ -21,6 +21,8 @@ type BackupConfig struct { S3BackupConfig *S3BackupConfig `yaml:",omitempty" json:"s3BackupConfig"` // replace special characters in snapshot names SafeTimestamp bool `yaml:"safe_timestamp" json:"safeTimestamp,omitempty"` + // Backup execution timeout + Timeout int `yaml:"timeout" json:"timeout,omitempty" norman:"default=300"` } type S3BackupConfig struct {