From 91321ef85b58e42bfd56011bb96e0052c88ac9d7 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 19 Apr 2018 14:01:18 -0500 Subject: [PATCH] dockershim/sandbox: clean up pod network even if SetUpPod() failed If the CNI network plugin completes successfully, but something fails between that success and dockerhsim's sandbox setup code, plugin resources may not be cleaned up. A non-trivial amount of code runs after the plugin itself exits and the CNI driver's SetUpPod() returns, and any error condition recognized by that code would cause this leakage. The Kubernetes CRI RunPodSandbox() request does not attempt to clean up on errors, since it cannot know how much (if any) networking was actually set up. It depends on the CRI implementation to do that cleanup for it. In the dockershim case, a SetUpPod() failure means networkReady is FALSE for the sandbox, and TearDownPod() will not be called later by garbage collection even though networking was configured, because dockershim can't know how far SetUpPod() got. Concrete examples include if the sandbox's container is somehow removed during during that time, or another OS error is encountered, or the plugin returns a malformed result to the CNI driver. Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1532965 --- pkg/kubelet/dockershim/docker_sandbox.go | 20 +++++++++++++++---- pkg/kubelet/dockershim/docker_sandbox_test.go | 2 ++ 2 files changed, 18 insertions(+), 4 deletions(-) diff --git a/pkg/kubelet/dockershim/docker_sandbox.go b/pkg/kubelet/dockershim/docker_sandbox.go index a9bd36789b4..487193e69b7 100644 --- a/pkg/kubelet/dockershim/docker_sandbox.go +++ b/pkg/kubelet/dockershim/docker_sandbox.go @@ -163,12 +163,24 @@ func (ds *dockerService) RunPodSandbox(ctx context.Context, r *runtimeapi.RunPod cID := kubecontainer.BuildContainerID(runtimeName, createResp.ID) err = ds.network.SetUpPod(config.GetMetadata().Namespace, config.GetMetadata().Name, cID, config.Annotations) if err != nil { - // TODO(random-liu): Do we need to teardown network here? - if err := ds.client.StopContainer(createResp.ID, defaultSandboxGracePeriod); err != nil { - glog.Warningf("Failed to stop sandbox container %q for pod %q: %v", createResp.ID, config.Metadata.Name, err) + errList := []error{fmt.Errorf("failed to set up sandbox container %q network for pod %q: %v", createResp.ID, config.Metadata.Name, err)} + + // Ensure network resources are cleaned up even if the plugin + // succeeded but an error happened between that success and here. + err = ds.network.TearDownPod(config.GetMetadata().Namespace, config.GetMetadata().Name, cID) + if err != nil { + errList = append(errList, fmt.Errorf("failed to clean up sandbox container %q network for pod %q: %v", createResp.ID, config.Metadata.Name, err)) } + + err = ds.client.StopContainer(createResp.ID, defaultSandboxGracePeriod) + if err != nil { + errList = append(errList, fmt.Errorf("failed to stop sandbox container %q for pod %q: %v", createResp.ID, config.Metadata.Name, err)) + } + + return resp, utilerrors.NewAggregate(errList) } - return resp, err + + return resp, nil } // StopPodSandbox stops the sandbox. If there are any running containers in the diff --git a/pkg/kubelet/dockershim/docker_sandbox_test.go b/pkg/kubelet/dockershim/docker_sandbox_test.go index e7d3796a882..520e5091a7f 100644 --- a/pkg/kubelet/dockershim/docker_sandbox_test.go +++ b/pkg/kubelet/dockershim/docker_sandbox_test.go @@ -277,6 +277,8 @@ func TestSetUpPodFailure(t *testing.T) { cID := kubecontainer.ContainerID{Type: runtimeName, ID: libdocker.GetFakeContainerID(fmt.Sprintf("/%v", makeSandboxName(c)))} mockPlugin.EXPECT().Name().Return("mockNetworkPlugin").AnyTimes() mockPlugin.EXPECT().SetUpPod(ns, name, cID).Return(errors.New("setup pod error")).AnyTimes() + // If SetUpPod() fails, we expect TearDownPod() to immediately follow + mockPlugin.EXPECT().TearDownPod(ns, name, cID) // Assume network plugin doesn't return error, dockershim should still be able to return not ready correctly. mockPlugin.EXPECT().GetPodNetworkStatus(ns, name, cID).Return(&network.PodNetworkStatus{IP: net.IP("127.0.0.01")}, nil).AnyTimes()