Merge pull request #42767 from Random-Liu/cleanup-infra-container-on-error

Automatic merge from submit-queue (batch tested with PRs 42768, 42760, 42771, 42767)

Stop sandbox container when hit network error.

Fixes https://github.com/kubernetes/kubernetes/issues/42698.

This PR stops the sandbox container when hitting a network error.
This PR also adds a unit test for it.

I'm not sure whether we should try teardown pod network after `SetUpPod` failure. We don't do that in dockertools https://github.com/kubernetes/kubernetes/blob/master/pkg/kubelet/dockertools/docker_manager.go#L2276.

@yujuhong @freehan
This commit is contained in:
Kubernetes Submit Queue 2017-03-09 00:08:01 -08:00 committed by GitHub
commit 4cf553f78e
2 changed files with 52 additions and 1 deletions

View File

@ -105,7 +105,12 @@ func (ds *dockerService) RunPodSandbox(config *runtimeapi.PodSandboxConfig) (str
// recognized by the CNI standard yet. // recognized by the CNI standard yet.
cID := kubecontainer.BuildContainerID(runtimeName, createResp.ID) cID := kubecontainer.BuildContainerID(runtimeName, createResp.ID)
err = ds.network.SetUpPod(config.GetMetadata().Namespace, config.GetMetadata().Name, cID, config.Annotations) err = ds.network.SetUpPod(config.GetMetadata().Namespace, config.GetMetadata().Name, cID, config.Annotations)
// TODO: Do we need to teardown on failure or can we rely on a StopPodSandbox call with the given ID? if err != nil {
// TODO(random-liu): Do we need to teardown network here?
if err := ds.client.StopContainer(createResp.ID, defaultSandboxGracePeriod); err != nil {
glog.Warningf("Failed to stop sandbox container %q for pod %q: %v", createResp.ID, config.Metadata.Name, err)
}
}
return createResp.ID, err return createResp.ID, err
} }

View File

@ -17,7 +17,9 @@ limitations under the License.
package dockershim package dockershim
import ( import (
"errors"
"fmt" "fmt"
"net"
"os" "os"
"testing" "testing"
"time" "time"
@ -203,3 +205,47 @@ func TestHostNetworkPluginInvocation(t *testing.T) {
assert.NoError(t, err) assert.NoError(t, err)
assert.NoError(t, ds.StopPodSandbox(cID.ID)) assert.NoError(t, ds.StopPodSandbox(cID.ID))
} }
// TestSetUpPodFailure checks that the sandbox should be not ready when it
// hits a SetUpPod failure.
func TestSetUpPodFailure(t *testing.T) {
ds, _, _ := newTestDockerService()
mockPlugin := newTestNetworkPlugin(t)
ds.network = network.NewPluginManager(mockPlugin)
defer mockPlugin.Finish()
name := "foo0"
ns := "bar0"
c := makeSandboxConfigWithLabelsAndAnnotations(
name, ns, "0", 0,
map[string]string{"label": name},
map[string]string{"annotation": ns},
)
cID := kubecontainer.ContainerID{Type: runtimeName, ID: dockertools.GetFakeContainerID(fmt.Sprintf("/%v", makeSandboxName(c)))}
mockPlugin.EXPECT().Name().Return("mockNetworkPlugin").AnyTimes()
mockPlugin.EXPECT().SetUpPod(ns, name, cID).Return(errors.New("setup pod error")).AnyTimes()
// Assume network plugin doesn't return error, dockershim should still be able to return not ready correctly.
mockPlugin.EXPECT().GetPodNetworkStatus(ns, name, cID).Return(&network.PodNetworkStatus{IP: net.IP("127.0.0.01")}, nil).AnyTimes()
t.Logf("RunPodSandbox should return error")
_, err := ds.RunPodSandbox(c)
assert.Error(t, err)
t.Logf("PodSandboxStatus should be not ready")
status, err := ds.PodSandboxStatus(cID.ID)
assert.NoError(t, err)
assert.Equal(t, runtimeapi.PodSandboxState_SANDBOX_NOTREADY, status.State)
t.Logf("ListPodSandbox should also show not ready")
sandboxes, err := ds.ListPodSandbox(nil)
assert.NoError(t, err)
var sandbox *runtimeapi.PodSandbox
for _, s := range sandboxes {
if s.Id == cID.ID {
sandbox = s
break
}
}
assert.NotNil(t, sandbox)
assert.Equal(t, runtimeapi.PodSandboxState_SANDBOX_NOTREADY, sandbox.State)
}