diff --git a/pkg/kubelet/dockertools/docker_manager.go b/pkg/kubelet/dockertools/docker_manager.go index 51d34f14198..e97f0777c11 100644 --- a/pkg/kubelet/dockertools/docker_manager.go +++ b/pkg/kubelet/dockertools/docker_manager.go @@ -1440,21 +1440,22 @@ func (dm *DockerManager) KillPod(pod *v1.Pod, runningPod kubecontainer.Pod, grac } // NOTE(random-liu): The pod passed in could be *nil* when kubelet restarted. -func (dm *DockerManager) killPodWithSyncResult(pod *v1.Pod, runningPod kubecontainer.Pod, gracePeriodOverride *int64) (result kubecontainer.PodSyncResult) { +// runtimePod may contain either running or exited containers +func (dm *DockerManager) killPodWithSyncResult(pod *v1.Pod, runtimePod kubecontainer.Pod, gracePeriodOverride *int64) (result kubecontainer.PodSyncResult) { // Short circuit if there's nothing to kill. - if len(runningPod.Containers) == 0 { + if len(runtimePod.Containers) == 0 { return } // Send the kills in parallel since they may take a long time. - // There may be len(runningPod.Containers) or len(runningPod.Containers)-1 of result in the channel - containerResults := make(chan *kubecontainer.SyncResult, len(runningPod.Containers)) + // There may be len(runtimePod.Containers) or len(runtimePod.Containers)-1 of result in the channel + containerResults := make(chan *kubecontainer.SyncResult, len(runtimePod.Containers)) wg := sync.WaitGroup{} var ( - networkContainer *kubecontainer.Container - networkSpec *v1.Container + networkContainers []*kubecontainer.Container + networkSpecs []*v1.Container ) - wg.Add(len(runningPod.Containers)) - for _, container := range runningPod.Containers { + wg.Add(len(runtimePod.Containers)) + for _, container := range runtimePod.Containers { go func(container *kubecontainer.Container) { defer utilruntime.HandleCrash() defer wg.Done() @@ -1479,21 +1480,20 @@ func (dm *DockerManager) killPodWithSyncResult(pod *v1.Pod, runningPod kubeconta // TODO: Handle this without signaling the pod infra container to // adapt to the generic container runtime. - if container.Name == PodInfraContainerName { + if containerIsNetworked(container.Name) { // Store the container runtime for later deletion. // We do this so that PreStop handlers can run in the network namespace. - networkContainer = container - networkSpec = containerSpec - return + networkContainers = append(networkContainers, container) + networkSpecs = append(networkSpecs, containerSpec) + } else { + killContainerResult := kubecontainer.NewSyncResult(kubecontainer.KillContainer, container.Name) + err := dm.KillContainerInPod(container.ID, containerSpec, pod, "Need to kill pod.", gracePeriodOverride) + if err != nil { + killContainerResult.Fail(kubecontainer.ErrKillContainer, err.Error()) + glog.Errorf("Failed to delete container %v: %v; Skipping pod %q", container.ID.ID, err, runtimePod.ID) + } + containerResults <- killContainerResult } - - killContainerResult := kubecontainer.NewSyncResult(kubecontainer.KillContainer, container.Name) - err := dm.KillContainerInPod(container.ID, containerSpec, pod, "Need to kill pod.", gracePeriodOverride) - if err != nil { - killContainerResult.Fail(kubecontainer.ErrKillContainer, err.Error()) - glog.Errorf("Failed to delete container %v: %v; Skipping pod %q", container.ID.ID, err, runningPod.ID) - } - containerResults <- killContainerResult }(container) } wg.Wait() @@ -1501,27 +1501,37 @@ func (dm *DockerManager) killPodWithSyncResult(pod *v1.Pod, runningPod kubeconta for containerResult := range containerResults { result.AddSyncResult(containerResult) } - if networkContainer != nil { + + // Tear down any dead or running network/infra containers, but only kill + // those that are still running. + for i := range networkContainers { + networkContainer := networkContainers[i] + networkSpec := networkSpecs[i] + + teardownNetworkResult := kubecontainer.NewSyncResult(kubecontainer.TeardownNetwork, kubecontainer.BuildPodFullName(runtimePod.Name, runtimePod.Namespace)) + result.AddSyncResult(teardownNetworkResult) + ins, err := dm.client.InspectContainer(networkContainer.ID.ID) if err != nil { err = fmt.Errorf("Error inspecting container %v: %v", networkContainer.ID.ID, err) glog.Error(err) - result.Fail(err) - return + teardownNetworkResult.Fail(kubecontainer.ErrTeardownNetwork, err.Error()) + continue } + if getDockerNetworkMode(ins) != namespaceModeHost { - teardownNetworkResult := kubecontainer.NewSyncResult(kubecontainer.TeardownNetwork, kubecontainer.BuildPodFullName(runningPod.Name, runningPod.Namespace)) - result.AddSyncResult(teardownNetworkResult) - if err := dm.network.TearDownPod(runningPod.Namespace, runningPod.Name, networkContainer.ID); err != nil { + if err := dm.network.TearDownPod(runtimePod.Namespace, runtimePod.Name, networkContainer.ID); err != nil { teardownNetworkResult.Fail(kubecontainer.ErrTeardownNetwork, err.Error()) glog.Error(err) } } - killContainerResult := kubecontainer.NewSyncResult(kubecontainer.KillContainer, networkContainer.Name) - result.AddSyncResult(killContainerResult) - if err := dm.KillContainerInPod(networkContainer.ID, networkSpec, pod, "Need to kill pod.", gracePeriodOverride); err != nil { - killContainerResult.Fail(kubecontainer.ErrKillContainer, err.Error()) - glog.Errorf("Failed to delete container %v: %v; Skipping pod %q", networkContainer.ID.ID, err, runningPod.ID) + if networkContainer.State == kubecontainer.ContainerStateRunning { + killContainerResult := kubecontainer.NewSyncResult(kubecontainer.KillContainer, networkContainer.Name) + result.AddSyncResult(killContainerResult) + if err := dm.KillContainerInPod(networkContainer.ID, networkSpec, pod, "Need to kill pod.", gracePeriodOverride); err != nil { + killContainerResult.Fail(kubecontainer.ErrKillContainer, err.Error()) + glog.Errorf("Failed to delete container %v: %v; Skipping pod %q", networkContainer.ID.ID, err, runtimePod.ID) + } } } return @@ -2150,9 +2160,29 @@ func (dm *DockerManager) SyncPod(pod *v1.Pod, _ v1.PodStatus, podStatus *kubecon glog.V(4).Infof("Killing Infra Container for %q, will start new one", format.Pod(pod)) } + // Get list of running container(s) to kill + podToKill := kubecontainer.ConvertPodStatusToRunningPod(dm.Type(), podStatus) + + // If there are dead network containers, also kill them to ensure + // their network resources get released and are available to be + // re-used by new net containers + for _, containerStatus := range podStatus.ContainerStatuses { + if containerIsNetworked(containerStatus.Name) && containerStatus.State == kubecontainer.ContainerStateExited { + container := &kubecontainer.Container{ + ID: containerStatus.ID, + Name: containerStatus.Name, + Image: containerStatus.Image, + ImageID: containerStatus.ImageID, + Hash: containerStatus.Hash, + State: containerStatus.State, + } + podToKill.Containers = append(podToKill.Containers, container) + } + } + // Killing phase: if we want to start new infra container, or nothing is running kill everything (including infra container) // TODO(random-liu): We'll use pod status directly in the future - killResult := dm.killPodWithSyncResult(pod, kubecontainer.ConvertPodStatusToRunningPod(dm.Type(), podStatus), nil) + killResult := dm.killPodWithSyncResult(pod, podToKill, nil) result.AddPodSyncResult(killResult) if killResult.Error() != nil { return diff --git a/pkg/kubelet/dockertools/docker_manager_test.go b/pkg/kubelet/dockertools/docker_manager_test.go index 41b3386532d..da0ca6bf15c 100644 --- a/pkg/kubelet/dockertools/docker_manager_test.go +++ b/pkg/kubelet/dockertools/docker_manager_test.go @@ -1828,6 +1828,55 @@ func TestGetPodStatusNoSuchContainer(t *testing.T) { // Verify that we will try to start new contrainers even if the inspections // failed. verifyCalls(t, fakeDocker, []string{ + // Inspect dead infra container for possible network teardown + "inspect_container", + // Start a new infra container. + "create", "start", "inspect_container", "inspect_container", + // Start a new container. + "create", "start", "inspect_container", + }) +} + +func TestSyncPodDeadInfraContainerTeardown(t *testing.T) { + const ( + noSuchContainerID = "nosuchcontainer" + infraContainerID = "9876" + ) + dm, fakeDocker := newTestDockerManager() + dm.podInfraContainerImage = "pod_infra_image" + ctrl := gomock.NewController(t) + defer ctrl.Finish() + fnp := nettest.NewMockNetworkPlugin(ctrl) + dm.network = network.NewPluginManager(fnp) + + pod := makePod("foo", &v1.PodSpec{ + Containers: []v1.Container{{Name: noSuchContainerID}}, + }) + + fakeDocker.SetFakeContainers([]*FakeContainer{ + { + ID: infraContainerID, + Name: "/k8s_POD." + strconv.FormatUint(generatePodInfraContainerHash(pod), 16) + "_foo_new_12345678_42", + ExitCode: 0, + StartedAt: time.Now(), + FinishedAt: time.Now(), + Running: false, + }, + }) + + // Can be called multiple times due to GetPodStatus + fnp.EXPECT().Name().Return("someNetworkPlugin").AnyTimes() + fnp.EXPECT().TearDownPod("new", "foo", gomock.Any()).Return(nil) + fnp.EXPECT().GetPodNetworkStatus("new", "foo", gomock.Any()).Return(&network.PodNetworkStatus{IP: net.ParseIP("1.1.1.1")}, nil).AnyTimes() + fnp.EXPECT().SetUpPod("new", "foo", gomock.Any()).Return(nil) + + runSyncPod(t, dm, fakeDocker, pod, nil, false) + + // Verify that we will try to start new contrainers even if the inspections + // failed. + verifyCalls(t, fakeDocker, []string{ + // Inspect dead infra container for possible network teardown + "inspect_container", // Start a new infra container. "create", "start", "inspect_container", "inspect_container", // Start a new container.