mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-23 19:56:01 +00:00
Merge pull request #36768 from yujuhong/recover_from_failure
Automatic merge from submit-queue dockershim: remove sandbox container upon naming conflicts We have observed that, after failing to create a container due to "device or resource busy", docker may end up having inconsistent internal state. One symptom is that docker will not report the existence of the "failed to create" container, but if kubelet tries to create a new container with the same name, docker will error out with a naming conflict message. To work around this, this commit parses the creation error message and if there is a naming conflict, it would attempt to remove the existing container.
This commit is contained in:
commit
1085ce6081
@ -84,5 +84,6 @@ go_test(
|
||||
"//vendor:github.com/docker/engine-api/types/container",
|
||||
"//vendor:github.com/golang/mock/gomock",
|
||||
"//vendor:github.com/stretchr/testify/assert",
|
||||
"//vendor:github.com/stretchr/testify/require",
|
||||
],
|
||||
)
|
||||
|
@ -174,6 +174,8 @@ func (ds *dockerService) CreateContainer(podSandboxID string, config *runtimeApi
|
||||
|
||||
createConfig.HostConfig = hc
|
||||
createResp, err := ds.client.CreateContainer(createConfig)
|
||||
recoverFromConflictIfNeeded(ds.client, err)
|
||||
|
||||
if createResp != nil {
|
||||
return createResp.ID, err
|
||||
}
|
||||
|
@ -68,6 +68,8 @@ func (ds *dockerService) RunPodSandbox(config *runtimeApi.PodSandboxConfig) (str
|
||||
return "", fmt.Errorf("failed to make sandbox docker config for pod %q: %v", config.Metadata.GetName(), err)
|
||||
}
|
||||
createResp, err := ds.client.CreateContainer(*createConfig)
|
||||
recoverFromConflictIfNeeded(ds.client, err)
|
||||
|
||||
if err != nil || createResp == nil {
|
||||
return "", fmt.Errorf("failed to create a sandbox for pod %q: %v", config.Metadata.GetName(), err)
|
||||
}
|
||||
|
@ -18,6 +18,7 @@ package dockershim
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"regexp"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
@ -37,6 +38,10 @@ const (
|
||||
annotationPrefix = "annotation."
|
||||
)
|
||||
|
||||
var (
|
||||
conflictRE = regexp.MustCompile(`Conflict. (?:.)+ is already in use by container ([0-9a-z]+)`)
|
||||
)
|
||||
|
||||
// apiVersion implements kubecontainer.Version interface by implementing
|
||||
// Compare() and String(). It uses the compare function of engine-api to
|
||||
// compare docker apiversions.
|
||||
@ -282,3 +287,30 @@ func getUserFromImageUser(imageUser string) (*int64, *string) {
|
||||
// If user is a numeric uid.
|
||||
return &uid, nil
|
||||
}
|
||||
|
||||
// See #33189. If the previous attempt to create a sandbox container name FOO
|
||||
// failed due to "device or resource busy", it is possbile that docker did
|
||||
// not clean up properly and has inconsistent internal state. Docker would
|
||||
// not report the existence of FOO, but would complain if user wants to
|
||||
// create a new container named FOO. To work around this, we parse the error
|
||||
// message to identify failure caused by naming conflict, and try to remove
|
||||
// the old container FOO.
|
||||
// TODO(#33189): Monitor the tests to see if the fix is sufficent.
|
||||
func recoverFromConflictIfNeeded(client dockertools.DockerInterface, err error) {
|
||||
if err == nil {
|
||||
return
|
||||
}
|
||||
|
||||
matches := conflictRE.FindStringSubmatch(err.Error())
|
||||
if len(matches) != 2 {
|
||||
return
|
||||
}
|
||||
|
||||
id := matches[1]
|
||||
glog.Warningf("Unable to create pod sandbox due to conflict. Attempting to remove sandbox %q", id)
|
||||
if err := client.RemoveContainer(id, dockertypes.ContainerRemoveOptions{RemoveVolumes: true}); err != nil {
|
||||
glog.Errorf("Failed to remove the conflicting sandbox container: %v", err)
|
||||
} else {
|
||||
glog.V(2).Infof("Successfully removed conflicting sandbox %q", id)
|
||||
}
|
||||
}
|
||||
|
@ -20,6 +20,7 @@ import (
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
"k8s.io/kubernetes/pkg/api"
|
||||
runtimeApi "k8s.io/kubernetes/pkg/kubelet/api/v1alpha1/runtime"
|
||||
@ -227,3 +228,12 @@ func TestGetUserFromImageUser(t *testing.T) {
|
||||
assert.Equal(t, test.name, actualName)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParsingCreationConflictError(t *testing.T) {
|
||||
// Expected error message from docker.
|
||||
msg := "Conflict. The name \"/k8s_POD_pfpod_e2e-tests-port-forwarding-dlxt2_81a3469e-99e1-11e6-89f2-42010af00002_0\" is already in use by container 24666ab8c814d16f986449e504ea0159468ddf8da01897144a770f66dce0e14e. You have to remove (or rename) that container to be able to reuse that name."
|
||||
|
||||
matches := conflictRE.FindStringSubmatch(msg)
|
||||
require.Len(t, matches, 2)
|
||||
require.Equal(t, matches[1], "24666ab8c814d16f986449e504ea0159468ddf8da01897144a770f66dce0e14e")
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user