Merge pull request #62874 from dcbw/dockershim-SetUpPod-cleanup-on-failure

Automatic merge from submit-queue. If you want to cherry-pick this change to another branch, please follow the instructions <a href="https://github.com/kubernetes/community/blob/master/contributors/devel/cherry-picks.md">here</a>.

dockershim/sandbox: clean up pod network even if SetUpPod() failed

If the CNI network plugin completes successfully, but something fails
between that success and dockerhsim's sandbox setup code, plugin resources
may not be cleaned up. A non-trivial amount of code runs after the
plugin itself exits and the CNI driver's SetUpPod() returns, and any error
condition recognized by that code would cause this leakage.

The Kubernetes CRI RunPodSandbox() request does not attempt to clean
up on errors, since it cannot know how much (if any) networking
was actually set up. It depends on the CRI implementation to do
that cleanup for it.

In the dockershim case, a SetUpPod() failure means networkReady is
FALSE for the sandbox, and TearDownPod() will not be called later by
garbage collection even though networking was configured, because
dockershim can't know how far SetUpPod() got.

Concrete examples include if the sandbox's container is somehow
removed during during that time, or another OS error is encountered,
or the plugin returns a malformed result to the CNI driver.

Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1532965

```release-note
NONE
```
This commit is contained in:
Kubernetes Submit Queue 2018-04-24 21:48:01 -07:00 committed by GitHub
commit 61892abc94
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 18 additions and 4 deletions

View File

@ -163,12 +163,24 @@ func (ds *dockerService) RunPodSandbox(ctx context.Context, r *runtimeapi.RunPod
cID := kubecontainer.BuildContainerID(runtimeName, createResp.ID)
err = ds.network.SetUpPod(config.GetMetadata().Namespace, config.GetMetadata().Name, cID, config.Annotations)
if err != nil {
// TODO(random-liu): Do we need to teardown network here?
if err := ds.client.StopContainer(createResp.ID, defaultSandboxGracePeriod); err != nil {
glog.Warningf("Failed to stop sandbox container %q for pod %q: %v", createResp.ID, config.Metadata.Name, err)
errList := []error{fmt.Errorf("failed to set up sandbox container %q network for pod %q: %v", createResp.ID, config.Metadata.Name, err)}
// Ensure network resources are cleaned up even if the plugin
// succeeded but an error happened between that success and here.
err = ds.network.TearDownPod(config.GetMetadata().Namespace, config.GetMetadata().Name, cID)
if err != nil {
errList = append(errList, fmt.Errorf("failed to clean up sandbox container %q network for pod %q: %v", createResp.ID, config.Metadata.Name, err))
}
err = ds.client.StopContainer(createResp.ID, defaultSandboxGracePeriod)
if err != nil {
errList = append(errList, fmt.Errorf("failed to stop sandbox container %q for pod %q: %v", createResp.ID, config.Metadata.Name, err))
}
return resp, utilerrors.NewAggregate(errList)
}
return resp, err
return resp, nil
}
// StopPodSandbox stops the sandbox. If there are any running containers in the

View File

@ -277,6 +277,8 @@ func TestSetUpPodFailure(t *testing.T) {
cID := kubecontainer.ContainerID{Type: runtimeName, ID: libdocker.GetFakeContainerID(fmt.Sprintf("/%v", makeSandboxName(c)))}
mockPlugin.EXPECT().Name().Return("mockNetworkPlugin").AnyTimes()
mockPlugin.EXPECT().SetUpPod(ns, name, cID).Return(errors.New("setup pod error")).AnyTimes()
// If SetUpPod() fails, we expect TearDownPod() to immediately follow
mockPlugin.EXPECT().TearDownPod(ns, name, cID)
// Assume network plugin doesn't return error, dockershim should still be able to return not ready correctly.
mockPlugin.EXPECT().GetPodNetworkStatus(ns, name, cID).Return(&network.PodNetworkStatus{IP: net.IP("127.0.0.01")}, nil).AnyTimes()