diff --git a/pkg/kubelet/dockershim/docker_container.go b/pkg/kubelet/dockershim/docker_container.go index 5fb38c4d71c..ee7a8dfaf5c 100644 --- a/pkg/kubelet/dockershim/docker_container.go +++ b/pkg/kubelet/dockershim/docker_container.go @@ -176,7 +176,9 @@ func (ds *dockerService) CreateContainer(podSandboxID string, config *runtimeapi createConfig.HostConfig = hc createResp, err := ds.client.CreateContainer(createConfig) - recoverFromConflictIfNeeded(ds.client, err) + if err != nil { + createResp, err = recoverFromCreationConflictIfNeeded(ds.client, createConfig, err) + } if createResp != nil { return createResp.ID, err diff --git a/pkg/kubelet/dockershim/docker_container_test.go b/pkg/kubelet/dockershim/docker_container_test.go index ebabc71e86a..ab2cfedf872 100644 --- a/pkg/kubelet/dockershim/docker_container_test.go +++ b/pkg/kubelet/dockershim/docker_container_test.go @@ -19,6 +19,7 @@ package dockershim import ( "fmt" "path/filepath" + "strings" "testing" "time" @@ -215,3 +216,69 @@ func TestContainerLogPath(t *testing.T) { assert.NoError(t, err) assert.Equal(t, fakeOS.Removes, []string{kubeletContainerLogPath}) } + +// TestContainerCreationConflict tests the logic to work around docker container +// creation naming conflict bug. +func TestContainerCreationConflict(t *testing.T) { + sConfig := makeSandboxConfig("foo", "bar", "1", 0) + config := makeContainerConfig(sConfig, "pause", "iamimage", 0, map[string]string{}, map[string]string{}) + containerName := makeContainerName(sConfig, config) + const sandboxId = "sandboxid" + const containerId = "containerid" + conflictError := fmt.Errorf("Error response from daemon: Conflict. The name \"/%s\" is already in use by container %s. You have to remove (or rename) that container to be able to reuse that name.", + containerName, containerId) + noContainerError := fmt.Errorf("Error response from daemon: No such container: %s", containerId) + randomError := fmt.Errorf("random error") + + for desc, test := range map[string]struct { + createError error + removeError error + expectError error + expectCalls []string + expectFields int + }{ + "no create error": { + expectCalls: []string{"create"}, + expectFields: 6, + }, + "random create error": { + createError: randomError, + expectError: randomError, + expectCalls: []string{"create"}, + expectFields: 1, + }, + "conflict create error with successful remove": { + createError: conflictError, + expectError: conflictError, + expectCalls: []string{"create", "remove"}, + expectFields: 1, + }, + "conflict create error with random remove error": { + createError: conflictError, + removeError: randomError, + expectError: conflictError, + expectCalls: []string{"create", "remove"}, + expectFields: 1, + }, + "conflict create error with no such container remove error": { + createError: conflictError, + removeError: noContainerError, + expectCalls: []string{"create", "remove", "create"}, + expectFields: 7, + }, + } { + t.Logf("TestCase: %s", desc) + ds, fDocker, _ := newTestDockerService() + + if test.createError != nil { + fDocker.InjectError("create", test.createError) + } + if test.removeError != nil { + fDocker.InjectError("remove", test.removeError) + } + name, err := ds.CreateContainer(sandboxId, config, sConfig) + assert.Equal(t, test.expectError, err) + assert.NoError(t, fDocker.AssertCalls(test.expectCalls)) + assert.Len(t, strings.Split(name, nameDelimiter), test.expectFields) + } +} diff --git a/pkg/kubelet/dockershim/docker_sandbox.go b/pkg/kubelet/dockershim/docker_sandbox.go index 03da38451bd..def3268883f 100644 --- a/pkg/kubelet/dockershim/docker_sandbox.go +++ b/pkg/kubelet/dockershim/docker_sandbox.go @@ -69,7 +69,9 @@ func (ds *dockerService) RunPodSandbox(config *runtimeapi.PodSandboxConfig) (str return "", fmt.Errorf("failed to make sandbox docker config for pod %q: %v", config.Metadata.Name, err) } createResp, err := ds.client.CreateContainer(*createConfig) - recoverFromConflictIfNeeded(ds.client, err) + if err != nil { + createResp, err = recoverFromCreationConflictIfNeeded(ds.client, *createConfig, err) + } if err != nil || createResp == nil { return "", fmt.Errorf("failed to create a sandbox for pod %q: %v", config.Metadata.Name, err) diff --git a/pkg/kubelet/dockershim/helpers.go b/pkg/kubelet/dockershim/helpers.go index 5698b5cf704..a83e9de33a8 100644 --- a/pkg/kubelet/dockershim/helpers.go +++ b/pkg/kubelet/dockershim/helpers.go @@ -39,7 +39,8 @@ const ( ) var ( - conflictRE = regexp.MustCompile(`Conflict. (?:.)+ is already in use by container ([0-9a-z]+)`) + conflictRE = regexp.MustCompile(`Conflict. (?:.)+ is already in use by container ([0-9a-z]+)`) + noContainerRE = regexp.MustCompile(`No such container: [0-9a-z]+`) ) // apiVersion implements kubecontainer.Version interface by implementing @@ -295,22 +296,31 @@ func getUserFromImageUser(imageUser string) (*int64, string) { // create a new container named FOO. To work around this, we parse the error // message to identify failure caused by naming conflict, and try to remove // the old container FOO. +// See #40443. Sometimes even removal may fail with "no such container" error. +// In that case we have to create the container with a randomized name. +// TODO(random-liu): Remove this work around after docker 1.11 is deprecated. // TODO(#33189): Monitor the tests to see if the fix is sufficent. -func recoverFromConflictIfNeeded(client dockertools.DockerInterface, err error) { - if err == nil { - return - } - +func recoverFromCreationConflictIfNeeded(client dockertools.DockerInterface, createConfig dockertypes.ContainerCreateConfig, err error) (*dockertypes.ContainerCreateResponse, error) { matches := conflictRE.FindStringSubmatch(err.Error()) if len(matches) != 2 { - return + return nil, err } id := matches[1] glog.Warningf("Unable to create pod sandbox due to conflict. Attempting to remove sandbox %q", id) - if err := client.RemoveContainer(id, dockertypes.ContainerRemoveOptions{RemoveVolumes: true}); err != nil { - glog.Errorf("Failed to remove the conflicting sandbox container: %v", err) + if rmErr := client.RemoveContainer(id, dockertypes.ContainerRemoveOptions{RemoveVolumes: true}); rmErr == nil { + glog.V(2).Infof("Successfully removed conflicting container %q", id) + return nil, err } else { - glog.V(2).Infof("Successfully removed conflicting sandbox %q", id) + glog.Errorf("Failed to remove the conflicting container %q: %v", id, rmErr) + // Return if the error is not "No such container" error. + if !noContainerRE.MatchString(rmErr.Error()) { + return nil, err + } } + + // randomize the name to avoid conflict. + createConfig.Name = randomizeName(createConfig.Name) + glog.V(2).Infof("Create the container with randomized name %s", createConfig.Name) + return client.CreateContainer(createConfig) } diff --git a/pkg/kubelet/dockershim/helpers_test.go b/pkg/kubelet/dockershim/helpers_test.go index d5900cf19a1..aef94951fe1 100644 --- a/pkg/kubelet/dockershim/helpers_test.go +++ b/pkg/kubelet/dockershim/helpers_test.go @@ -236,3 +236,12 @@ func TestParsingCreationConflictError(t *testing.T) { require.Len(t, matches, 2) require.Equal(t, matches[1], "24666ab8c814d16f986449e504ea0159468ddf8da01897144a770f66dce0e14e") } + +func TestParsingRemovalNoContainerError(t *testing.T) { + // Expected error message from docker. + match := "Error response from daemon: No such container: 96e914f31579e44fe49b239266385330a9b2125abeb9254badd9fca74580c95a" + notMatch := "Error response from daemon: Other errors" + + assert.True(t, noContainerRE.MatchString(match)) + assert.False(t, noContainerRE.MatchString(notMatch)) +} diff --git a/pkg/kubelet/dockershim/naming.go b/pkg/kubelet/dockershim/naming.go index 062b012d214..4538ac5c97b 100644 --- a/pkg/kubelet/dockershim/naming.go +++ b/pkg/kubelet/dockershim/naming.go @@ -18,6 +18,7 @@ package dockershim import ( "fmt" + "math/rand" "strconv" "strings" @@ -78,6 +79,15 @@ func makeContainerName(s *runtimeapi.PodSandboxConfig, c *runtimeapi.ContainerCo } +// randomizeName randomizes the container name. This should only be used when we hit the +// docker container name conflict bug. +func randomizeName(name string) string { + return strings.Join([]string{ + name, + fmt.Sprintf("%08x", rand.Uint32()), + }, nameDelimiter) +} + func parseUint32(s string) (uint32, error) { n, err := strconv.ParseUint(s, 10, 32) if err != nil { @@ -92,7 +102,9 @@ func parseSandboxName(name string) (*runtimeapi.PodSandboxMetadata, error) { name = strings.TrimPrefix(name, "/") parts := strings.Split(name, nameDelimiter) - if len(parts) != 6 { + // Tolerate the random suffix. + // TODO(random-liu): Remove 7 field case when docker 1.11 is deprecated. + if len(parts) != 6 && len(parts) != 7 { return nil, fmt.Errorf("failed to parse the sandbox name: %q", name) } if parts[0] != kubePrefix { @@ -118,7 +130,9 @@ func parseContainerName(name string) (*runtimeapi.ContainerMetadata, error) { name = strings.TrimPrefix(name, "/") parts := strings.Split(name, nameDelimiter) - if len(parts) != 6 { + // Tolerate the random suffix. + // TODO(random-liu): Remove 7 field case when docker 1.11 is deprecated. + if len(parts) != 6 && len(parts) != 7 { return nil, fmt.Errorf("failed to parse the container name: %q", name) } if parts[0] != kubePrefix { diff --git a/pkg/kubelet/dockershim/naming_test.go b/pkg/kubelet/dockershim/naming_test.go index 9b70cbb73d1..23f8f632f0c 100644 --- a/pkg/kubelet/dockershim/naming_test.go +++ b/pkg/kubelet/dockershim/naming_test.go @@ -82,3 +82,25 @@ func TestNonParsableContainerNames(t *testing.T) { _, err = parseContainerName("k8s_frontend_foo_bar_iamuid_notanumber") assert.Error(t, err) } + +func TestParseRandomizedNames(t *testing.T) { + // Test randomized sandbox name. + sConfig := makeSandboxConfig("foo", "bar", "iamuid", 3) + sActualName := randomizeName(makeSandboxName(sConfig)) + sActualMetadata, err := parseSandboxName(sActualName) + assert.NoError(t, err) + assert.Equal(t, sConfig.Metadata, sActualMetadata) + + // Test randomized container name. + name, attempt := "pause", uint32(5) + config := &runtimeapi.ContainerConfig{ + Metadata: &runtimeapi.ContainerMetadata{ + Name: name, + Attempt: attempt, + }, + } + actualName := randomizeName(makeContainerName(sConfig, config)) + actualMetadata, err := parseContainerName(actualName) + assert.NoError(t, err) + assert.Equal(t, config.Metadata, actualMetadata) +}