1
0
mirror of https://github.com/rancher/rke.git synced 2025-06-24 22:42:23 +00:00

Add retry logic to Docker interactions

This commit is contained in:
Sebastiaan van Steenis 2019-06-11 11:39:45 +02:00 committed by Alena Prokharchyk
parent 60d36022f7
commit 173669c8a5
3 changed files with 186 additions and 91 deletions

View File

@ -31,6 +31,8 @@ const (
RestartTimeout = 5 RestartTimeout = 5
// StopTimeout in seconds // StopTimeout in seconds
StopTimeout = 5 StopTimeout = 5
// RetryCount is the amount of retries for Docker operations
RetryCount = 3
) )
var K8sDockerVersions = map[string][]string{ var K8sDockerVersions = map[string][]string{
@ -54,19 +56,19 @@ func DoRunContainer(ctx context.Context, dClient *client.Client, imageCfg *conta
if dClient == nil { if dClient == nil {
return fmt.Errorf("[%s] Failed to run container: docker client is nil for container [%s] on host [%s]", plane, containerName, hostname) return fmt.Errorf("[%s] Failed to run container: docker client is nil for container [%s] on host [%s]", plane, containerName, hostname)
} }
container, err := dClient.ContainerInspect(ctx, containerName) container, err := InspectContainer(ctx, dClient, hostname, containerName)
if err != nil { if err != nil {
if !client.IsErrNotFound(err) { if !client.IsErrNotFound(err) {
return err return err
} }
if err := UseLocalOrPull(ctx, dClient, hostname, imageCfg.Image, plane, prsMap); err != nil { if err := UseLocalOrPull(ctx, dClient, hostname, imageCfg.Image, plane, prsMap); err != nil {
return err return fmt.Errorf("Failed to pull image [%s] on host [%s]: %v", imageCfg.Image, hostname, err)
} }
resp, err := dClient.ContainerCreate(ctx, imageCfg, hostCfg, nil, containerName) _, err := CreateContainer(ctx, dClient, hostname, containerName, imageCfg, hostCfg)
if err != nil { if err != nil {
return fmt.Errorf("Failed to create [%s] container on host [%s]: %v", containerName, hostname, err) return fmt.Errorf("Failed to create [%s] container on host [%s]: %v", containerName, hostname, err)
} }
if err := dClient.ContainerStart(ctx, resp.ID, types.ContainerStartOptions{}); err != nil { if err := StartContainer(ctx, dClient, hostname, containerName); err != nil {
return fmt.Errorf("Failed to start [%s] container on host [%s]: %v", containerName, hostname, err) return fmt.Errorf("Failed to start [%s] container on host [%s]: %v", containerName, hostname, err)
} }
log.Infof(ctx, "[%s] Successfully started [%s] container on host [%s]", plane, containerName, hostname) log.Infof(ctx, "[%s] Successfully started [%s] container on host [%s]", plane, containerName, hostname)
@ -77,9 +79,9 @@ func DoRunContainer(ctx context.Context, dClient *client.Client, imageCfg *conta
// check if container is in a restarting loop // check if container is in a restarting loop
if container.State.Restarting { if container.State.Restarting {
logrus.Debugf("[%s] Container [%s] is in a restarting loop [%s]", plane, containerName, hostname) logrus.Debugf("[%s] Container [%s] is in a restarting loop [%s]", plane, containerName, hostname)
restartTimeoutDuration := RestartTimeout * time.Second err = RestartContainer(ctx, dClient, hostname, containerName)
if err := dClient.ContainerRestart(ctx, container.ID, &restartTimeoutDuration); err != nil { if err != nil {
return fmt.Errorf("Failed to start [%s] container on host [%s]: %v", containerName, hostname, err) return err
} }
} }
logrus.Debugf("[%s] Container [%s] is already running on host [%s]", plane, containerName, hostname) logrus.Debugf("[%s] Container [%s] is already running on host [%s]", plane, containerName, hostname)
@ -94,8 +96,8 @@ func DoRunContainer(ctx context.Context, dClient *client.Client, imageCfg *conta
} }
// start if not running // start if not running
logrus.Debugf("[%s] Starting stopped container [%s] on host [%s]", plane, containerName, hostname) logrus.Infof("[%s] Starting stopped container [%s] on host [%s]", plane, containerName, hostname)
if err := dClient.ContainerStart(ctx, container.ID, types.ContainerStartOptions{}); err != nil { if err := StartContainer(ctx, dClient, hostname, containerName); err != nil {
return fmt.Errorf("Failed to start [%s] container on host [%s]: %v", containerName, hostname, err) return fmt.Errorf("Failed to start [%s] container on host [%s]: %v", containerName, hostname, err)
} }
log.Infof(ctx, "[%s] Successfully started [%s] container on host [%s]", plane, containerName, hostname) log.Infof(ctx, "[%s] Successfully started [%s] container on host [%s]", plane, containerName, hostname)
@ -106,7 +108,7 @@ func DoRunOnetimeContainer(ctx context.Context, dClient *client.Client, imageCfg
if dClient == nil { if dClient == nil {
return fmt.Errorf("[%s] Failed to run container: docker client is nil for container [%s] on host [%s]", plane, containerName, hostname) return fmt.Errorf("[%s] Failed to run container: docker client is nil for container [%s] on host [%s]", plane, containerName, hostname)
} }
_, err := dClient.ContainerInspect(ctx, containerName) _, err := InspectContainer(ctx, dClient, hostname, containerName)
if err != nil { if err != nil {
if !client.IsErrNotFound(err) { if !client.IsErrNotFound(err) {
return err return err
@ -114,11 +116,11 @@ func DoRunOnetimeContainer(ctx context.Context, dClient *client.Client, imageCfg
if err := UseLocalOrPull(ctx, dClient, hostname, imageCfg.Image, plane, prsMap); err != nil { if err := UseLocalOrPull(ctx, dClient, hostname, imageCfg.Image, plane, prsMap); err != nil {
return err return err
} }
resp, err := dClient.ContainerCreate(ctx, imageCfg, hostCfg, nil, containerName) _, err := CreateContainer(ctx, dClient, hostname, containerName, imageCfg, hostCfg)
if err != nil { if err != nil {
return fmt.Errorf("Failed to create [%s] container on host [%s]: %v", containerName, hostname, err) return fmt.Errorf("Failed to create [%s] container on host [%s]: %v", containerName, hostname, err)
} }
if err := dClient.ContainerStart(ctx, resp.ID, types.ContainerStartOptions{}); err != nil { if err := StartContainer(ctx, dClient, hostname, containerName); err != nil {
return fmt.Errorf("Failed to start [%s] container on host [%s]: %v", containerName, hostname, err) return fmt.Errorf("Failed to start [%s] container on host [%s]: %v", containerName, hostname, err)
} }
log.Infof(ctx, "Successfully started [%s] container on host [%s]", containerName, hostname) log.Infof(ctx, "Successfully started [%s] container on host [%s]", containerName, hostname)
@ -182,8 +184,7 @@ func DoRemoveContainer(ctx context.Context, dClient *client.Client, containerNam
return fmt.Errorf("Failed to remove container: docker client is nil for container [%s] on host [%s]", containerName, hostname) return fmt.Errorf("Failed to remove container: docker client is nil for container [%s] on host [%s]", containerName, hostname)
} }
logrus.Debugf("[remove/%s] Checking if container is running on host [%s]", containerName, hostname) logrus.Debugf("[remove/%s] Checking if container is running on host [%s]", containerName, hostname)
// not using the wrapper to check if the error is a NotFound error _, err := InspectContainer(ctx, dClient, hostname, containerName)
_, err := dClient.ContainerInspect(ctx, containerName)
if err != nil { if err != nil {
if client.IsErrNotFound(err) { if client.IsErrNotFound(err) {
logrus.Debugf("[remove/%s] Container doesn't exist on host [%s]", containerName, hostname) logrus.Debugf("[remove/%s] Container doesn't exist on host [%s]", containerName, hostname)
@ -204,11 +205,19 @@ func IsContainerRunning(ctx context.Context, dClient *client.Client, hostname st
if dClient == nil { if dClient == nil {
return false, fmt.Errorf("Failed to check if container is running: docker client is nil for container [%s] on host [%s]", containerName, hostname) return false, fmt.Errorf("Failed to check if container is running: docker client is nil for container [%s] on host [%s]", containerName, hostname)
} }
logrus.Debugf("Checking if container [%s] is running on host [%s]", containerName, hostname) var containers []types.Container
containers, err := dClient.ContainerList(ctx, types.ContainerListOptions{All: all}) var err error
for i := 1; i <= RetryCount; i++ {
logrus.Infof("Checking if container [%s] is running on host [%s], try #%d", containerName, hostname, i)
containers, err = dClient.ContainerList(ctx, types.ContainerListOptions{All: all})
if err != nil {
logrus.Warnf("Error checking if container [%s] is running on host [%s]: %v", containerName, hostname, err)
continue
}
break
}
if err != nil { if err != nil {
return false, fmt.Errorf("Can't get Docker containers for host [%s]: %v", hostname, err) return false, fmt.Errorf("Error checking if container [%s] is running on host [%s]: %v", containerName, hostname, err)
} }
for _, container := range containers { for _, container := range containers {
if len(container.Names) != 0 && container.Names[0] == "/"+containerName { if len(container.Names) != 0 && container.Names[0] == "/"+containerName {
@ -219,20 +228,27 @@ func IsContainerRunning(ctx context.Context, dClient *client.Client, hostname st
} }
func localImageExists(ctx context.Context, dClient *client.Client, hostname string, containerImage string) (bool, error) { func localImageExists(ctx context.Context, dClient *client.Client, hostname string, containerImage string) (bool, error) {
logrus.Debugf("Checking if image [%s] exists on host [%s]", containerImage, hostname) var err error
_, _, err := dClient.ImageInspectWithRaw(ctx, containerImage) for i := 1; i <= RetryCount; i++ {
if err != nil { logrus.Infof("Checking if image [%s] exists on host [%s], try #%d", containerImage, hostname, i)
if client.IsErrNotFound(err) { _, _, err = dClient.ImageInspectWithRaw(ctx, containerImage)
logrus.Debugf("Image [%s] does not exist on host [%s]: %v", containerImage, hostname, err) if err != nil {
return false, nil if client.IsErrNotFound(err) {
logrus.Infof("Image [%s] does not exist on host [%s]: %v", containerImage, hostname, err)
return false, nil
}
logrus.Warnf("Error checking if image [%s] exists on host [%s]: %v", containerImage, hostname, err)
continue
} }
return false, fmt.Errorf("Error checking if image [%s] exists on host [%s]: %v", containerImage, hostname, err) logrus.Infof("Image [%s] exists on host [%s]", containerImage, hostname)
return true, nil
} }
logrus.Debugf("Image [%s] exists on host [%s]", containerImage, hostname) return false, fmt.Errorf("Error checking if image [%s] exists on host [%s]: %v", containerImage, hostname, err)
return true, nil
} }
func pullImage(ctx context.Context, dClient *client.Client, hostname string, containerImage string, prsMap map[string]v3.PrivateRegistry) error { func pullImage(ctx context.Context, dClient *client.Client, hostname string, containerImage string, prsMap map[string]v3.PrivateRegistry) error {
var out io.ReadCloser
var err error
pullOptions := types.ImagePullOptions{} pullOptions := types.ImagePullOptions{}
regAuth, prURL, err := GetImageRegistryConfig(containerImage, prsMap) regAuth, prURL, err := GetImageRegistryConfig(containerImage, prsMap)
@ -244,119 +260,190 @@ func pullImage(ctx context.Context, dClient *client.Client, hostname string, con
} }
pullOptions.RegistryAuth = regAuth pullOptions.RegistryAuth = regAuth
out, err := dClient.ImagePull(ctx, containerImage, pullOptions) // Retry up to RetryCount times to pull image
if err != nil { for i := 1; i <= RetryCount; i++ {
return fmt.Errorf("Can't pull Docker image [%s] for host [%s]: %v", containerImage, hostname, err) logrus.Infof("Pulling image [%s] on host [%s], try #%d", containerImage, hostname, i)
out, err = dClient.ImagePull(ctx, containerImage, pullOptions)
if err != nil {
logrus.Warnf("Can't pull Docker image [%s] on host [%s]: %v", containerImage, hostname, err)
continue
}
defer out.Close()
if logrus.GetLevel() == logrus.DebugLevel {
io.Copy(os.Stdout, out)
} else {
io.Copy(ioutil.Discard, out)
}
return nil
} }
defer out.Close() // If the for loop does not return, return the error
if logrus.GetLevel() == logrus.DebugLevel { return err
io.Copy(os.Stdout, out)
} else {
io.Copy(ioutil.Discard, out)
}
return nil
} }
func UseLocalOrPull(ctx context.Context, dClient *client.Client, hostname string, containerImage string, plane string, prsMap map[string]v3.PrivateRegistry) error { func UseLocalOrPull(ctx context.Context, dClient *client.Client, hostname string, containerImage string, plane string, prsMap map[string]v3.PrivateRegistry) error {
if dClient == nil { if dClient == nil {
return fmt.Errorf("[%s] Failed to use local image or pull: docker client is nil for container [%s] on host [%s]", plane, containerImage, hostname) return fmt.Errorf("[%s] Failed to use local image or pull: docker client is nil for container [%s] on host [%s]", plane, containerImage, hostname)
} }
logrus.Debugf("[%s] Checking image [%s] on host [%s]", plane, containerImage, hostname) var imageExists bool
imageExists, err := localImageExists(ctx, dClient, hostname, containerImage) var err error
if err != nil {
return err // Retry up to RetryCount times to see if image exists
for i := 1; i <= RetryCount; i++ {
// Increasing wait time on retry, but not on the first two try
if i > 2 {
time.Sleep(time.Duration(i) * time.Second)
}
// Check for local image and if error, log and retry
imageExists, err = localImageExists(ctx, dClient, hostname, containerImage)
if err != nil {
logrus.Warnf("[%s] %v", plane, err)
continue
}
// Return if image exists to prevent pulling
if imageExists {
return nil
}
// Try pulling and if error, log and retry
err = pullImage(ctx, dClient, hostname, containerImage, prsMap)
if err != nil {
logrus.Warnf("[%s] Can't pull Docker image [%s] on host [%s]: %v", plane, containerImage, hostname, err)
continue
}
} }
if imageExists { // If the for loop does not return, return the error
logrus.Debugf("[%s] No pull necessary, image [%s] exists on host [%s]", plane, containerImage, hostname) return err
return nil
}
log.Infof(ctx, "[%s] Pulling image [%s] on host [%s]", plane, containerImage, hostname)
if err := pullImage(ctx, dClient, hostname, containerImage, prsMap); err != nil {
return err
}
log.Infof(ctx, "[%s] Successfully pulled image [%s] on host [%s]", plane, containerImage, hostname)
return nil
} }
func RemoveContainer(ctx context.Context, dClient *client.Client, hostname string, containerName string) error { func RemoveContainer(ctx context.Context, dClient *client.Client, hostname string, containerName string) error {
if dClient == nil { if dClient == nil {
return fmt.Errorf("Failed to remove container: docker client is nil for container [%s] on host [%s]", containerName, hostname) return fmt.Errorf("Failed to remove container: docker client is nil for container [%s] on host [%s]", containerName, hostname)
} }
err := dClient.ContainerRemove(ctx, containerName, types.ContainerRemoveOptions{Force: true, RemoveVolumes: true}) var err error
if err != nil { // Retry up to RetryCount times to see if image exists
return fmt.Errorf("Can't remove Docker container [%s] for host [%s]: %v", containerName, hostname, err) for i := 1; i <= RetryCount; i++ {
logrus.Infof("Removing container [%s] on host [%s], try #%d", containerName, hostname, i)
err = dClient.ContainerRemove(ctx, containerName, types.ContainerRemoveOptions{Force: true, RemoveVolumes: true})
if err != nil {
logrus.Warningf("Can't remove Docker container [%s] for host [%s]: %v", containerName, hostname, err)
continue
}
return nil
} }
return nil return err
} }
func RestartContainer(ctx context.Context, dClient *client.Client, hostname, containerName string) error { func RestartContainer(ctx context.Context, dClient *client.Client, hostname, containerName string) error {
if dClient == nil { if dClient == nil {
return fmt.Errorf("Failed to restart container: docker client is nil for container [%s] on host [%s]", containerName, hostname) return fmt.Errorf("Failed to restart container: docker client is nil for container [%s] on host [%s]", containerName, hostname)
} }
var err error
restartTimeout := RestartTimeout * time.Second restartTimeout := RestartTimeout * time.Second
err := dClient.ContainerRestart(ctx, containerName, &restartTimeout) // Retry up to RetryCount times to see if image exists
if err != nil { for i := 1; i <= RetryCount; i++ {
return fmt.Errorf("Can't restart Docker container [%s] for host [%s]: %v", containerName, hostname, err) logrus.Infof("Restarting container [%s] on host [%s], try #%d", containerName, hostname, i)
err = dClient.ContainerRestart(ctx, containerName, &restartTimeout)
if err != nil {
logrus.Warningf("Can't restart Docker container [%s] for host [%s]: %v", containerName, hostname, err)
continue
}
return nil
} }
return nil return err
} }
func StopContainer(ctx context.Context, dClient *client.Client, hostname string, containerName string) error { func StopContainer(ctx context.Context, dClient *client.Client, hostname string, containerName string) error {
if dClient == nil { if dClient == nil {
return fmt.Errorf("Failed to stop container: docker client is nil for container [%s] on host [%s]", containerName, hostname) return fmt.Errorf("Failed to stop container: docker client is nil for container [%s] on host [%s]", containerName, hostname)
} }
var err error
// define the stop timeout // define the stop timeout
stopTimeoutDuration := StopTimeout * time.Second stopTimeoutDuration := StopTimeout * time.Second
logrus.Debugf("Stopping container [%s] on host [%s] with stopTimeoutDuration [%s]", containerName, hostname, stopTimeoutDuration) // Retry up to RetryCount times to see if image exists
err := dClient.ContainerStop(ctx, containerName, &stopTimeoutDuration) for i := 1; i <= RetryCount; i++ {
if err != nil { logrus.Infof("Stopping container [%s] on host [%s] with stopTimeoutDuration [%s], try #%d", containerName, hostname, stopTimeoutDuration, i)
return fmt.Errorf("Can't stop Docker container [%s] for host [%s]: %v", containerName, hostname, err) err := dClient.ContainerStop(ctx, containerName, &stopTimeoutDuration)
if err != nil {
logrus.Warningf("Can't stop Docker container [%s] for host [%s]: %v", containerName, hostname, err)
continue
}
return nil
} }
return nil return err
} }
func RenameContainer(ctx context.Context, dClient *client.Client, hostname string, oldContainerName string, newContainerName string) error { func RenameContainer(ctx context.Context, dClient *client.Client, hostname string, oldContainerName string, newContainerName string) error {
if dClient == nil { if dClient == nil {
return fmt.Errorf("Failed to rename container: docker client is nil for container [%s] on host [%s]", oldContainerName, hostname) return fmt.Errorf("Failed to rename container: docker client is nil for container [%s] on host [%s]", oldContainerName, hostname)
} }
logrus.Debugf("Renaming container [%s] to [%s] on host [%s]", oldContainerName, newContainerName, hostname) var err error
err := dClient.ContainerRename(ctx, oldContainerName, newContainerName) // Retry up to RetryCount times to see if image exists
if err != nil { for i := 1; i <= RetryCount; i++ {
return fmt.Errorf("Can't rename Docker container [%s] for host [%s]: %v", oldContainerName, hostname, err) logrus.Infof("Renaming container [%s] to [%s] on host [%s], try #%d", oldContainerName, newContainerName, hostname, i)
err = dClient.ContainerRename(ctx, oldContainerName, newContainerName)
if err != nil {
logrus.Warningf("Can't rename Docker container [%s] to [%s] for host [%s]: %v", oldContainerName, newContainerName, hostname, err)
continue
}
return nil
} }
return nil return err
} }
func StartContainer(ctx context.Context, dClient *client.Client, hostname string, containerName string) error { func StartContainer(ctx context.Context, dClient *client.Client, hostname string, containerName string) error {
if dClient == nil { if dClient == nil {
return fmt.Errorf("Failed to start container: docker client is nil for container [%s] on host [%s]", containerName, hostname) return fmt.Errorf("Failed to start container: docker client is nil for container [%s] on host [%s]", containerName, hostname)
} }
if err := dClient.ContainerStart(ctx, containerName, types.ContainerStartOptions{}); err != nil { var err error
return fmt.Errorf("Failed to start [%s] container on host [%s]: %v", containerName, hostname, err) // Retry up to RetryCount times to see if image exists
for i := 1; i <= RetryCount; i++ {
logrus.Infof("Starting container [%s] on host [%s], try #%d", containerName, hostname, i)
err = dClient.ContainerStart(ctx, containerName, types.ContainerStartOptions{})
if err != nil {
logrus.Warningf("Can't start Docker container [%s] on host [%s]: %v", containerName, hostname, err)
continue
}
return nil
} }
return nil return err
} }
func CreateContainer(ctx context.Context, dClient *client.Client, hostname string, containerName string, imageCfg *container.Config, hostCfg *container.HostConfig) (container.ContainerCreateCreatedBody, error) { func CreateContainer(ctx context.Context, dClient *client.Client, hostname string, containerName string, imageCfg *container.Config, hostCfg *container.HostConfig) (container.ContainerCreateCreatedBody, error) {
if dClient == nil { if dClient == nil {
return container.ContainerCreateCreatedBody{}, fmt.Errorf("Failed to create container: docker client is nil for container [%s] on host [%s]", containerName, hostname) return container.ContainerCreateCreatedBody{}, fmt.Errorf("Failed to create container: docker client is nil for container [%s] on host [%s]", containerName, hostname)
} }
created, err := dClient.ContainerCreate(ctx, imageCfg, hostCfg, nil, containerName) var err error
if err != nil { // Retry up to RetryCount times to see if image exists
return container.ContainerCreateCreatedBody{}, fmt.Errorf("Failed to create [%s] container on host [%s]: %v", containerName, hostname, err) for i := 1; i <= RetryCount; i++ {
created, err := dClient.ContainerCreate(ctx, imageCfg, hostCfg, nil, containerName)
if err != nil {
logrus.Warningf("Failed to create Docker container [%s] on host [%s]: %v", containerName, hostname, err)
continue
}
return created, nil
} }
return created, nil return container.ContainerCreateCreatedBody{}, fmt.Errorf("Failed to create Docker container [%s] on host [%s]: %v", containerName, hostname, err)
} }
func InspectContainer(ctx context.Context, dClient *client.Client, hostname string, containerName string) (types.ContainerJSON, error) { func InspectContainer(ctx context.Context, dClient *client.Client, hostname string, containerName string) (types.ContainerJSON, error) {
if dClient == nil { if dClient == nil {
return types.ContainerJSON{}, fmt.Errorf("Failed to inspect container: docker client is nil for container [%s] on host [%s]", containerName, hostname) return types.ContainerJSON{}, fmt.Errorf("Failed to inspect container: docker client is nil for container [%s] on host [%s]", containerName, hostname)
} }
inspection, err := dClient.ContainerInspect(ctx, containerName) var err error
if err != nil { // Retry up to RetryCount times to see if image exists
return types.ContainerJSON{}, fmt.Errorf("Failed to inspect [%s] container on host [%s]: %v", containerName, hostname, err) for i := 1; i <= RetryCount; i++ {
inspection, err := dClient.ContainerInspect(ctx, containerName)
if err != nil {
if client.IsErrNotFound(err) {
return types.ContainerJSON{}, err
}
logrus.Warningf("Failed to inspect Docker container [%s] on host [%s]: %v", containerName, hostname, err)
continue
}
return inspection, nil
} }
return inspection, nil return types.ContainerJSON{}, fmt.Errorf("Failed to inspect Docker container [%s] on host [%s]: %v", containerName, hostname, err)
} }
func StopRenameContainer(ctx context.Context, dClient *client.Client, hostname string, oldContainerName string, newContainerName string) error { func StopRenameContainer(ctx context.Context, dClient *client.Client, hostname string, oldContainerName string, newContainerName string) error {
@ -390,7 +477,7 @@ func WaitForContainer(ctx context.Context, dClient *client.Client, hostname stri
// 5 minutes timeout, especially for transferring snapshots // 5 minutes timeout, especially for transferring snapshots
for retries := 0; retries < 300; retries++ { for retries := 0; retries < 300; retries++ {
log.Infof(ctx, "Waiting for [%s] container to exit on host [%s]", containerName, hostname) log.Infof(ctx, "Waiting for [%s] container to exit on host [%s]", containerName, hostname)
container, err := dClient.ContainerInspect(ctx, containerName) container, err := InspectContainer(ctx, dClient, hostname, containerName)
if err != nil { if err != nil {
return 1, fmt.Errorf("Could not inspect container [%s] on host [%s]: %s", containerName, hostname, err) return 1, fmt.Errorf("Could not inspect container [%s] on host [%s]: %s", containerName, hostname, err)
} }
@ -483,7 +570,16 @@ func ReadContainerLogs(ctx context.Context, dClient *client.Client, containerNam
if dClient == nil { if dClient == nil {
return nil, fmt.Errorf("Failed reading container logs: docker client is nil for container [%s]", containerName) return nil, fmt.Errorf("Failed reading container logs: docker client is nil for container [%s]", containerName)
} }
return dClient.ContainerLogs(ctx, containerName, types.ContainerLogsOptions{Follow: follow, ShowStdout: true, ShowStderr: true, Timestamps: false, Tail: tail}) var err error
for i := 1; i <= RetryCount; i++ {
logs, err := dClient.ContainerLogs(ctx, containerName, types.ContainerLogsOptions{Follow: follow, ShowStdout: true, ShowStderr: true, Timestamps: false, Tail: tail})
if err != nil {
logrus.Warnf("Can't read container logs for container [%s]: %v", containerName, err)
continue
}
return logs, nil
}
return nil, err
} }
func GetContainerLogsStdoutStderr(ctx context.Context, dClient *client.Client, containerName, tail string, follow bool) (string, string, error) { func GetContainerLogsStdoutStderr(ctx context.Context, dClient *client.Client, containerName, tail string, follow bool) (string, string, error) {
@ -571,8 +667,7 @@ func DoRestartContainer(ctx context.Context, dClient *client.Client, containerNa
return fmt.Errorf("Failed to restart container: docker client is nil for container [%s] on host [%s]", containerName, hostname) return fmt.Errorf("Failed to restart container: docker client is nil for container [%s] on host [%s]", containerName, hostname)
} }
logrus.Debugf("[restart/%s] Checking if container is running on host [%s]", containerName, hostname) logrus.Debugf("[restart/%s] Checking if container is running on host [%s]", containerName, hostname)
// not using the wrapper to check if the error is a NotFound error _, err := InspectContainer(ctx, dClient, hostname, containerName)
_, err := dClient.ContainerInspect(ctx, containerName)
if err != nil { if err != nil {
if client.IsErrNotFound(err) { if client.IsErrNotFound(err) {
logrus.Debugf("[restart/%s] Container doesn't exist on host [%s]", containerName, hostname) logrus.Debugf("[restart/%s] Container doesn't exist on host [%s]", containerName, hostname)

View File

@ -10,7 +10,6 @@ import (
"strings" "strings"
"time" "time"
"github.com/docker/docker/api/types"
"github.com/docker/docker/api/types/container" "github.com/docker/docker/api/types/container"
"github.com/rancher/rke/docker" "github.com/rancher/rke/docker"
"github.com/rancher/rke/hosts" "github.com/rancher/rke/hosts"
@ -94,15 +93,15 @@ func doRunDeployer(ctx context.Context, host *hosts.Host, containerEnv []string,
}, },
Privileged: true, Privileged: true,
} }
resp, err := host.DClient.ContainerCreate(ctx, imageCfg, hostCfg, nil, CrtDownloaderContainer) _, err = docker.CreateContainer(ctx, host.DClient, host.Address, CrtDownloaderContainer, imageCfg, hostCfg)
if err != nil { if err != nil {
return fmt.Errorf("Failed to create Certificates deployer container on host [%s]: %v", host.Address, err) return fmt.Errorf("Failed to create Certificates deployer container on host [%s]: %v", host.Address, err)
} }
if err := host.DClient.ContainerStart(ctx, resp.ID, types.ContainerStartOptions{}); err != nil { if err := docker.StartContainer(ctx, host.DClient, host.Address, CrtDownloaderContainer); err != nil {
return fmt.Errorf("Failed to start Certificates deployer container on host [%s]: %v", host.Address, err) return fmt.Errorf("Failed to start Certificates deployer container on host [%s]: %v", host.Address, err)
} }
logrus.Debugf("[certificates] Successfully started Certificate deployer container: %s", resp.ID) logrus.Debugf("[certificates] Successfully started Certificate deployer container: %s", CrtDownloaderContainer)
for { for {
isDeployerRunning, err := docker.IsContainerRunning(ctx, host.DClient, host.Address, CrtDownloaderContainer, false) isDeployerRunning, err := docker.IsContainerRunning(ctx, host.DClient, host.Address, CrtDownloaderContainer, false)
if err != nil { if err != nil {
@ -112,7 +111,7 @@ func doRunDeployer(ctx context.Context, host *hosts.Host, containerEnv []string,
time.Sleep(5 * time.Second) time.Sleep(5 * time.Second)
continue continue
} }
if err := host.DClient.ContainerRemove(ctx, resp.ID, types.ContainerRemoveOptions{RemoveVolumes: true}); err != nil { if err := docker.RemoveContainer(ctx, host.DClient, host.Address, CrtDownloaderContainer); err != nil {
return fmt.Errorf("Failed to delete Certificates deployer container on host [%s]: %v", host.Address, err) return fmt.Errorf("Failed to delete Certificates deployer container on host [%s]: %v", host.Address, err)
} }
return nil return nil

View File

@ -271,7 +271,6 @@ func IsEtcdMember(ctx context.Context, etcdHost *hosts.Host, etcdHosts []*hosts.
} }
func RunEtcdSnapshotSave(ctx context.Context, etcdHost *hosts.Host, prsMap map[string]v3.PrivateRegistry, etcdSnapshotImage string, name string, once bool, es v3.ETCDService) error { func RunEtcdSnapshotSave(ctx context.Context, etcdHost *hosts.Host, prsMap map[string]v3.PrivateRegistry, etcdSnapshotImage string, name string, once bool, es v3.ETCDService) error {
log.Infof(ctx, "[etcd] Saving snapshot [%s] on host [%s]", name, etcdHost.Address)
backupCmd := "etcd-backup" backupCmd := "etcd-backup"
restartPolicy := "always" restartPolicy := "always"
imageCfg := &container.Config{ imageCfg := &container.Config{
@ -288,9 +287,11 @@ func RunEtcdSnapshotSave(ctx context.Context, etcdHost *hosts.Host, prsMap map[s
Image: etcdSnapshotImage, Image: etcdSnapshotImage,
} }
if once { if once {
log.Infof(ctx, "[etcd] Running snapshot save once on host [%s]", etcdHost.Address)
imageCfg.Cmd = append(imageCfg.Cmd, "--once") imageCfg.Cmd = append(imageCfg.Cmd, "--once")
restartPolicy = "no" restartPolicy = "no"
} else if es.BackupConfig == nil { } else if es.BackupConfig == nil {
log.Infof(ctx, "[etcd] Running snapshot container [%s] on host [%s]", EtcdSnapshotOnceContainerName, etcdHost.Address)
imageCfg.Cmd = append(imageCfg.Cmd, "--retention="+es.Retention) imageCfg.Cmd = append(imageCfg.Cmd, "--retention="+es.Retention)
imageCfg.Cmd = append(imageCfg.Cmd, "--creation="+es.Creation) imageCfg.Cmd = append(imageCfg.Cmd, "--creation="+es.Creation)
} }