From 5c90908eb07b13ce1107f461c573d2a8b3fb30cf Mon Sep 17 00:00:00 2001 From: Yu-Ju Hong Date: Fri, 11 Nov 2016 16:09:23 -0800 Subject: [PATCH] dockershim: remove container upon naming conflicts We have observed that, after failing to create a container due to "device or resource busy", docker may end up having inconsistent internal state. One symptom is that docker will not report the existence of the "failed to create" container, but if kubelet tries to create a new container with the same name, docker will error out with a naming conflict message. To work around this, this commit parses the creation error message and if there is a naming conflict, it would attempt to remove the existing container. --- pkg/kubelet/dockershim/BUILD | 1 + pkg/kubelet/dockershim/docker_container.go | 2 ++ pkg/kubelet/dockershim/docker_sandbox.go | 2 ++ pkg/kubelet/dockershim/helpers.go | 32 ++++++++++++++++++++++ pkg/kubelet/dockershim/helpers_test.go | 10 +++++++ 5 files changed, 47 insertions(+) diff --git a/pkg/kubelet/dockershim/BUILD b/pkg/kubelet/dockershim/BUILD index cba56f896e4..9b73ad4b990 100644 --- a/pkg/kubelet/dockershim/BUILD +++ b/pkg/kubelet/dockershim/BUILD @@ -84,5 +84,6 @@ go_test( "//vendor:github.com/docker/engine-api/types/container", "//vendor:github.com/golang/mock/gomock", "//vendor:github.com/stretchr/testify/assert", + "//vendor:github.com/stretchr/testify/require", ], ) diff --git a/pkg/kubelet/dockershim/docker_container.go b/pkg/kubelet/dockershim/docker_container.go index 257807d9285..dc04ace491b 100644 --- a/pkg/kubelet/dockershim/docker_container.go +++ b/pkg/kubelet/dockershim/docker_container.go @@ -174,6 +174,8 @@ func (ds *dockerService) CreateContainer(podSandboxID string, config *runtimeApi createConfig.HostConfig = hc createResp, err := ds.client.CreateContainer(createConfig) + recoverFromConflictIfNeeded(ds.client, err) + if createResp != nil { return createResp.ID, err } diff --git a/pkg/kubelet/dockershim/docker_sandbox.go b/pkg/kubelet/dockershim/docker_sandbox.go index 689d937d283..88fcb4716cd 100644 --- a/pkg/kubelet/dockershim/docker_sandbox.go +++ b/pkg/kubelet/dockershim/docker_sandbox.go @@ -68,6 +68,8 @@ func (ds *dockerService) RunPodSandbox(config *runtimeApi.PodSandboxConfig) (str return "", fmt.Errorf("failed to make sandbox docker config for pod %q: %v", config.Metadata.GetName(), err) } createResp, err := ds.client.CreateContainer(*createConfig) + recoverFromConflictIfNeeded(ds.client, err) + if err != nil || createResp == nil { return "", fmt.Errorf("failed to create a sandbox for pod %q: %v", config.Metadata.GetName(), err) } diff --git a/pkg/kubelet/dockershim/helpers.go b/pkg/kubelet/dockershim/helpers.go index 936a590363f..ee2440dbfa5 100644 --- a/pkg/kubelet/dockershim/helpers.go +++ b/pkg/kubelet/dockershim/helpers.go @@ -18,6 +18,7 @@ package dockershim import ( "fmt" + "regexp" "strconv" "strings" @@ -37,6 +38,10 @@ const ( annotationPrefix = "annotation." ) +var ( + conflictRE = regexp.MustCompile(`Conflict. (?:.)+ is already in use by container ([0-9a-z]+)`) +) + // apiVersion implements kubecontainer.Version interface by implementing // Compare() and String(). It uses the compare function of engine-api to // compare docker apiversions. @@ -282,3 +287,30 @@ func getUserFromImageUser(imageUser string) (*int64, *string) { // If user is a numeric uid. return &uid, nil } + +// See #33189. If the previous attempt to create a sandbox container name FOO +// failed due to "device or resource busy", it is possbile that docker did +// not clean up properly and has inconsistent internal state. Docker would +// not report the existence of FOO, but would complain if user wants to +// create a new container named FOO. To work around this, we parse the error +// message to identify failure caused by naming conflict, and try to remove +// the old container FOO. +// TODO(#33189): Monitor the tests to see if the fix is sufficent. +func recoverFromConflictIfNeeded(client dockertools.DockerInterface, err error) { + if err == nil { + return + } + + matches := conflictRE.FindStringSubmatch(err.Error()) + if len(matches) != 2 { + return + } + + id := matches[1] + glog.Warningf("Unable to create pod sandbox due to conflict. Attempting to remove sandbox %q", id) + if err := client.RemoveContainer(id, dockertypes.ContainerRemoveOptions{RemoveVolumes: true}); err != nil { + glog.Errorf("Failed to remove the conflicting sandbox container: %v", err) + } else { + glog.V(2).Infof("Successfully removed conflicting sandbox %q", id) + } +} diff --git a/pkg/kubelet/dockershim/helpers_test.go b/pkg/kubelet/dockershim/helpers_test.go index 0fa927f1a68..9fb0daebc37 100644 --- a/pkg/kubelet/dockershim/helpers_test.go +++ b/pkg/kubelet/dockershim/helpers_test.go @@ -20,6 +20,7 @@ import ( "testing" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" "k8s.io/kubernetes/pkg/api" runtimeApi "k8s.io/kubernetes/pkg/kubelet/api/v1alpha1/runtime" @@ -227,3 +228,12 @@ func TestGetUserFromImageUser(t *testing.T) { assert.Equal(t, test.name, actualName) } } + +func TestParsingCreationConflictError(t *testing.T) { + // Expected error message from docker. + msg := "Conflict. The name \"/k8s_POD_pfpod_e2e-tests-port-forwarding-dlxt2_81a3469e-99e1-11e6-89f2-42010af00002_0\" is already in use by container 24666ab8c814d16f986449e504ea0159468ddf8da01897144a770f66dce0e14e. You have to remove (or rename) that container to be able to reuse that name." + + matches := conflictRE.FindStringSubmatch(msg) + require.Len(t, matches, 2) + require.Equal(t, matches[1], "24666ab8c814d16f986449e504ea0159468ddf8da01897144a770f66dce0e14e") +}