mirror of
https://github.com/rancher/rke.git
synced 2025-04-27 11:21:08 +00:00
Added etcd snapshot timeout parameter
This commit is contained in:
parent
e395badf82
commit
1880404fc3
@ -54,6 +54,7 @@ const (
|
||||
DefaultMonitoringProvider = "metrics-server"
|
||||
DefaultEtcdBackupConfigIntervalHours = 12
|
||||
DefaultEtcdBackupConfigRetention = 6
|
||||
DefaultEtcdBackupConfigTimeout = docker.WaitTimeout
|
||||
|
||||
DefaultDNSProvider = "kube-dns"
|
||||
K8sVersionCoreDNS = "1.14.0"
|
||||
@ -336,6 +337,9 @@ func (c *Cluster) setClusterServicesDefaults() {
|
||||
if c.Services.Etcd.BackupConfig.Retention == 0 {
|
||||
c.Services.Etcd.BackupConfig.Retention = DefaultEtcdBackupConfigRetention
|
||||
}
|
||||
if c.Services.Etcd.BackupConfig.Timeout == 0 {
|
||||
c.Services.Etcd.BackupConfig.Timeout = DefaultEtcdBackupConfigTimeout
|
||||
}
|
||||
}
|
||||
|
||||
if _, ok := c.Services.KubeAPI.ExtraArgs[KubeAPIArgAdmissionControlConfigFile]; !ok {
|
||||
|
@ -18,7 +18,12 @@ import (
|
||||
func (c *Cluster) SnapshotEtcd(ctx context.Context, snapshotName string) error {
|
||||
backupImage := c.getBackupImage()
|
||||
for _, host := range c.EtcdHosts {
|
||||
if err := services.RunEtcdSnapshotSave(ctx, host, c.PrivateRegistriesMap, backupImage, snapshotName, true, c.Services.Etcd); err != nil {
|
||||
containerTimeout := DefaultEtcdBackupConfigTimeout
|
||||
if c.Services.Etcd.BackupConfig != nil && c.Services.Etcd.BackupConfig.Timeout > 0 {
|
||||
containerTimeout = c.Services.Etcd.BackupConfig.Timeout
|
||||
}
|
||||
newCtx := context.WithValue(ctx, docker.WaitTimeoutContextKey, containerTimeout)
|
||||
if err := services.RunEtcdSnapshotSave(newCtx, host, c.PrivateRegistriesMap, backupImage, snapshotName, true, c.Services.Etcd); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
@ -175,7 +180,12 @@ func (c *Cluster) RestoreEtcdSnapshot(ctx context.Context, snapshotPath string)
|
||||
initCluster := services.GetEtcdInitialCluster(c.EtcdHosts)
|
||||
backupImage := c.getBackupImage()
|
||||
for _, host := range c.EtcdHosts {
|
||||
if err := services.RestoreEtcdSnapshot(ctx, host, c.PrivateRegistriesMap, c.SystemImages.Etcd, backupImage,
|
||||
containerTimeout := DefaultEtcdBackupConfigTimeout
|
||||
if c.Services.Etcd.BackupConfig != nil && c.Services.Etcd.BackupConfig.Timeout > 0 {
|
||||
containerTimeout = c.Services.Etcd.BackupConfig.Timeout
|
||||
}
|
||||
newCtx := context.WithValue(ctx, docker.WaitTimeoutContextKey, containerTimeout)
|
||||
if err := services.RestoreEtcdSnapshot(newCtx, host, c.PrivateRegistriesMap, c.SystemImages.Etcd, backupImage,
|
||||
snapshotPath, initCluster, c.Services.Etcd); err != nil {
|
||||
return fmt.Errorf("[etcd] Failed to restore etcd snapshot: %v", err)
|
||||
}
|
||||
|
@ -34,6 +34,10 @@ const (
|
||||
StopTimeout = 5
|
||||
// RetryCount is the amount of retries for Docker operations
|
||||
RetryCount = 3
|
||||
// WaitTimeout in seconds
|
||||
WaitTimeout = 300
|
||||
// WaitTimeoutContextKey name
|
||||
WaitTimeoutContextKey = "wait_timeout"
|
||||
)
|
||||
|
||||
type dockerConfig struct {
|
||||
@ -492,8 +496,12 @@ func WaitForContainer(ctx context.Context, dClient *client.Client, hostname stri
|
||||
if dClient == nil {
|
||||
return 1, fmt.Errorf("Failed waiting for container: docker client is nil for container [%s] on host [%s]", containerName, hostname)
|
||||
}
|
||||
// 5 minutes timeout, especially for transferring snapshots
|
||||
for retries := 0; retries < 300; retries++ {
|
||||
// Set containerTimeout value from context or default. Especially for transferring snapshots
|
||||
containerTimeout := WaitTimeout
|
||||
if v, ok := ctx.Value(WaitTimeoutContextKey).(int); ok && v > 0 {
|
||||
containerTimeout = v
|
||||
}
|
||||
for retries := 0; retries < containerTimeout; retries++ {
|
||||
log.Infof(ctx, "Waiting for [%s] container to exit on host [%s]", containerName, hostname)
|
||||
container, err := InspectContainer(ctx, dClient, hostname, containerName)
|
||||
if err != nil {
|
||||
|
@ -21,6 +21,8 @@ type BackupConfig struct {
|
||||
S3BackupConfig *S3BackupConfig `yaml:",omitempty" json:"s3BackupConfig"`
|
||||
// replace special characters in snapshot names
|
||||
SafeTimestamp bool `yaml:"safe_timestamp" json:"safeTimestamp,omitempty"`
|
||||
// Backup execution timeout
|
||||
Timeout int `yaml:"timeout" json:"timeout,omitempty" norman:"default=300"`
|
||||
}
|
||||
|
||||
type S3BackupConfig struct {
|
||||
|
Loading…
Reference in New Issue
Block a user