diff --git a/pkg/kubelet/dockershim/docker_container.go b/pkg/kubelet/dockershim/docker_container.go index 5fb38c4d71c..ee7a8dfaf5c 100644 --- a/pkg/kubelet/dockershim/docker_container.go +++ b/pkg/kubelet/dockershim/docker_container.go @@ -176,7 +176,9 @@ func (ds *dockerService) CreateContainer(podSandboxID string, config *runtimeapi createConfig.HostConfig = hc createResp, err := ds.client.CreateContainer(createConfig) - recoverFromConflictIfNeeded(ds.client, err) + if err != nil { + createResp, err = recoverFromCreationConflictIfNeeded(ds.client, createConfig, err) + } if createResp != nil { return createResp.ID, err diff --git a/pkg/kubelet/dockershim/docker_sandbox.go b/pkg/kubelet/dockershim/docker_sandbox.go index 03da38451bd..def3268883f 100644 --- a/pkg/kubelet/dockershim/docker_sandbox.go +++ b/pkg/kubelet/dockershim/docker_sandbox.go @@ -69,7 +69,9 @@ func (ds *dockerService) RunPodSandbox(config *runtimeapi.PodSandboxConfig) (str return "", fmt.Errorf("failed to make sandbox docker config for pod %q: %v", config.Metadata.Name, err) } createResp, err := ds.client.CreateContainer(*createConfig) - recoverFromConflictIfNeeded(ds.client, err) + if err != nil { + createResp, err = recoverFromCreationConflictIfNeeded(ds.client, *createConfig, err) + } if err != nil || createResp == nil { return "", fmt.Errorf("failed to create a sandbox for pod %q: %v", config.Metadata.Name, err) diff --git a/pkg/kubelet/dockershim/helpers.go b/pkg/kubelet/dockershim/helpers.go index 5698b5cf704..a83e9de33a8 100644 --- a/pkg/kubelet/dockershim/helpers.go +++ b/pkg/kubelet/dockershim/helpers.go @@ -39,7 +39,8 @@ const ( ) var ( - conflictRE = regexp.MustCompile(`Conflict. (?:.)+ is already in use by container ([0-9a-z]+)`) + conflictRE = regexp.MustCompile(`Conflict. (?:.)+ is already in use by container ([0-9a-z]+)`) + noContainerRE = regexp.MustCompile(`No such container: [0-9a-z]+`) ) // apiVersion implements kubecontainer.Version interface by implementing @@ -295,22 +296,31 @@ func getUserFromImageUser(imageUser string) (*int64, string) { // create a new container named FOO. To work around this, we parse the error // message to identify failure caused by naming conflict, and try to remove // the old container FOO. +// See #40443. Sometimes even removal may fail with "no such container" error. +// In that case we have to create the container with a randomized name. +// TODO(random-liu): Remove this work around after docker 1.11 is deprecated. // TODO(#33189): Monitor the tests to see if the fix is sufficent. -func recoverFromConflictIfNeeded(client dockertools.DockerInterface, err error) { - if err == nil { - return - } - +func recoverFromCreationConflictIfNeeded(client dockertools.DockerInterface, createConfig dockertypes.ContainerCreateConfig, err error) (*dockertypes.ContainerCreateResponse, error) { matches := conflictRE.FindStringSubmatch(err.Error()) if len(matches) != 2 { - return + return nil, err } id := matches[1] glog.Warningf("Unable to create pod sandbox due to conflict. Attempting to remove sandbox %q", id) - if err := client.RemoveContainer(id, dockertypes.ContainerRemoveOptions{RemoveVolumes: true}); err != nil { - glog.Errorf("Failed to remove the conflicting sandbox container: %v", err) + if rmErr := client.RemoveContainer(id, dockertypes.ContainerRemoveOptions{RemoveVolumes: true}); rmErr == nil { + glog.V(2).Infof("Successfully removed conflicting container %q", id) + return nil, err } else { - glog.V(2).Infof("Successfully removed conflicting sandbox %q", id) + glog.Errorf("Failed to remove the conflicting container %q: %v", id, rmErr) + // Return if the error is not "No such container" error. + if !noContainerRE.MatchString(rmErr.Error()) { + return nil, err + } } + + // randomize the name to avoid conflict. + createConfig.Name = randomizeName(createConfig.Name) + glog.V(2).Infof("Create the container with randomized name %s", createConfig.Name) + return client.CreateContainer(createConfig) } diff --git a/pkg/kubelet/dockershim/naming.go b/pkg/kubelet/dockershim/naming.go index 062b012d214..4538ac5c97b 100644 --- a/pkg/kubelet/dockershim/naming.go +++ b/pkg/kubelet/dockershim/naming.go @@ -18,6 +18,7 @@ package dockershim import ( "fmt" + "math/rand" "strconv" "strings" @@ -78,6 +79,15 @@ func makeContainerName(s *runtimeapi.PodSandboxConfig, c *runtimeapi.ContainerCo } +// randomizeName randomizes the container name. This should only be used when we hit the +// docker container name conflict bug. +func randomizeName(name string) string { + return strings.Join([]string{ + name, + fmt.Sprintf("%08x", rand.Uint32()), + }, nameDelimiter) +} + func parseUint32(s string) (uint32, error) { n, err := strconv.ParseUint(s, 10, 32) if err != nil { @@ -92,7 +102,9 @@ func parseSandboxName(name string) (*runtimeapi.PodSandboxMetadata, error) { name = strings.TrimPrefix(name, "/") parts := strings.Split(name, nameDelimiter) - if len(parts) != 6 { + // Tolerate the random suffix. + // TODO(random-liu): Remove 7 field case when docker 1.11 is deprecated. + if len(parts) != 6 && len(parts) != 7 { return nil, fmt.Errorf("failed to parse the sandbox name: %q", name) } if parts[0] != kubePrefix { @@ -118,7 +130,9 @@ func parseContainerName(name string) (*runtimeapi.ContainerMetadata, error) { name = strings.TrimPrefix(name, "/") parts := strings.Split(name, nameDelimiter) - if len(parts) != 6 { + // Tolerate the random suffix. + // TODO(random-liu): Remove 7 field case when docker 1.11 is deprecated. + if len(parts) != 6 && len(parts) != 7 { return nil, fmt.Errorf("failed to parse the container name: %q", name) } if parts[0] != kubePrefix {