diff --git a/pkg/kubelet/dockertools/fake_docker_client.go b/pkg/kubelet/dockertools/fake_docker_client.go index 88f68a710a5..e2e99351204 100644 --- a/pkg/kubelet/dockertools/fake_docker_client.go +++ b/pkg/kubelet/dockertools/fake_docker_client.go @@ -71,13 +71,13 @@ func (f *FakeDockerClient) AssertUnorderedCalls(calls []string) (err error) { f.Lock() defer f.Unlock() - expected := make([]string, len(calls)) - actual := make([]string, len(f.called)) - copy(expected, calls) - copy(actual, f.called) + actual := make([]string, len(calls)) + expected := make([]string, len(f.called)) + copy(actual, calls) + copy(expected, f.called) - sort.StringSlice(expected).Sort() sort.StringSlice(actual).Sort() + sort.StringSlice(expected).Sort() if !reflect.DeepEqual(actual, expected) { err = fmt.Errorf("expected(sorted) %#v, got(sorted) %#v", expected, actual) diff --git a/pkg/kubelet/dockertools/manager.go b/pkg/kubelet/dockertools/manager.go index 7ae3f1f42e4..3723253722d 100644 --- a/pkg/kubelet/dockertools/manager.go +++ b/pkg/kubelet/dockertools/manager.go @@ -30,6 +30,7 @@ import ( "github.com/GoogleCloudPlatform/kubernetes/pkg/capabilities" "github.com/GoogleCloudPlatform/kubernetes/pkg/client/record" kubecontainer "github.com/GoogleCloudPlatform/kubernetes/pkg/kubelet/container" + "github.com/GoogleCloudPlatform/kubernetes/pkg/types" "github.com/GoogleCloudPlatform/kubernetes/pkg/util" "github.com/fsouza/go-dockerclient" "github.com/golang/glog" @@ -50,6 +51,40 @@ func NewDockerManager(client DockerInterface, recorder record.EventRecorder) *Do return &DockerManager{client: client, recorder: recorder} } +// GetRecentDockerContainersWithNameAndUUID returns a list of dead docker containers which matches the name +// and uid given. +func (self *DockerManager) GetRecentDockerContainersWithNameAndUUID(podFullName string, uid types.UID, + containerName string) ([]*docker.Container, error) { + var result []*docker.Container + containers, err := self.client.ListContainers(docker.ListContainersOptions{All: true}) + if err != nil { + return nil, err + } + for _, dockerContainer := range containers { + if len(dockerContainer.Names) == 0 { + continue + } + dockerName, _, err := ParseDockerName(dockerContainer.Names[0]) + if err != nil { + continue + } + if dockerName.PodFullName != podFullName { + continue + } + if uid != "" && dockerName.PodUID != uid { + continue + } + if dockerName.ContainerName != containerName { + continue + } + inspectResult, _ := self.client.InspectContainer(dockerContainer.ID) + if inspectResult != nil && !inspectResult.State.Running && !inspectResult.State.Paused { + result = append(result, inspectResult) + } + } + return result, nil +} + // GetKubeletDockerContainerLogs returns logs of a specific container. By // default, it returns a snapshot of the container log. Set |follow| to true to // stream the log. Set |follow| to false and specify the number of lines (e.g. diff --git a/pkg/kubelet/kubelet.go b/pkg/kubelet/kubelet.go index da1a595fa85..e5721061470 100644 --- a/pkg/kubelet/kubelet.go +++ b/pkg/kubelet/kubelet.go @@ -999,33 +999,28 @@ func (kl *Kubelet) makePodDataDirs(pod *api.Pod) error { return nil } -func (kl *Kubelet) shouldContainerBeRestarted(container *api.Container, pod *api.Pod, podStatus *api.PodStatus) bool { +func (kl *Kubelet) shouldContainerBeRestarted(container *api.Container, pod *api.Pod) bool { podFullName := kubecontainer.GetPodFullName(pod) - - // Get all dead container status. - var resultStatus []*api.ContainerStatus - for i, containerStatus := range podStatus.ContainerStatuses { - if containerStatus.Name == container.Name && containerStatus.State.Termination != nil { - resultStatus = append(resultStatus, &podStatus.ContainerStatuses[i]) - } + // Check RestartPolicy for dead container + recentContainers, err := kl.containerManager.GetRecentDockerContainersWithNameAndUUID(podFullName, pod.UID, container.Name) + if err != nil { + glog.Errorf("Error listing recent containers for pod %q: %v", podFullName, err) + // TODO(dawnchen): error handling here? + } + // set dead containers to unready state + for _, c := range recentContainers { + kl.readinessManager.RemoveReadiness(c.ID) } - // Set dead containers to unready state. - for _, c := range resultStatus { - // TODO(yifan): Unify the format of container ID. (i.e. including docker:// as prefix). - kl.readinessManager.RemoveReadiness(strings.TrimPrefix(c.ContainerID, dockertools.DockerPrefix)) - } - - // Check RestartPolicy for dead container. - if len(resultStatus) > 0 { + if len(recentContainers) > 0 { if pod.Spec.RestartPolicy == api.RestartPolicyNever { glog.Infof("Already ran container %q of pod %q, do nothing", container.Name, podFullName) return false + } if pod.Spec.RestartPolicy == api.RestartPolicyOnFailure { - // Check the exit code of last run. Note: This assumes the result is sorted - // by the created time in reverse order. - if resultStatus[0].State.Termination.ExitCode == 0 { + // Check the exit code of last run + if recentContainers[0].State.ExitCode == 0 { glog.Infof("Already successfully ran container %q of pod %q, do nothing", container.Name, podFullName) return false } @@ -1181,7 +1176,7 @@ func (kl *Kubelet) computePodContainerChanges(pod *api.Pod, runningPod kubeconta continue } } else { - if kl.shouldContainerBeRestarted(&container, pod, &podStatus) { + if kl.shouldContainerBeRestarted(&container, pod) { // If we are here it means that the container is dead and sould be restarted, or never existed and should // be created. We may be inserting this ID again if the container has changed and it has // RestartPolicy::Always, but it's not a big deal. diff --git a/pkg/kubelet/kubelet_test.go b/pkg/kubelet/kubelet_test.go index 5d2966dc421..f0fbe1f354a 100644 --- a/pkg/kubelet/kubelet_test.go +++ b/pkg/kubelet/kubelet_test.go @@ -512,7 +512,7 @@ func TestSyncPodsWithTerminationLog(t *testing.T) { } waitGroup.Wait() verifyCalls(t, fakeDocker, []string{ - "list", "list", "create", "start", "inspect_container", "create", "start", "list", "inspect_container", "inspect_container"}) + "list", "list", "list", "create", "start", "inspect_container", "create", "start", "list", "inspect_container", "inspect_container"}) fakeDocker.Lock() parts := strings.Split(fakeDocker.Container.HostConfig.Binds[0], ":") @@ -564,7 +564,7 @@ func TestSyncPodsCreatesNetAndContainer(t *testing.T) { waitGroup.Wait() verifyCalls(t, fakeDocker, []string{ - "list", "list", "create", "start", "inspect_container", "create", "start", "list", "inspect_container", "inspect_container"}) + "list", "list", "list", "create", "start", "inspect_container", "create", "start", "list", "inspect_container", "inspect_container"}) fakeDocker.Lock() @@ -619,7 +619,7 @@ func TestSyncPodsCreatesNetAndContainerPullsImage(t *testing.T) { waitGroup.Wait() verifyCalls(t, fakeDocker, []string{ - "list", "list", "create", "start", "inspect_container", "create", "start", "list", "inspect_container", "inspect_container"}) + "list", "list", "list", "create", "start", "inspect_container", "create", "start", "list", "inspect_container", "inspect_container"}) fakeDocker.Lock() @@ -671,7 +671,7 @@ func TestSyncPodsWithPodInfraCreatesContainer(t *testing.T) { waitGroup.Wait() verifyCalls(t, fakeDocker, []string{ - "list", "list", "list", "inspect_container", "create", "start", "list", "inspect_container", "inspect_container"}) + "list", "list", "list", "inspect_container", "list", "create", "start", "list", "inspect_container", "inspect_container"}) fakeDocker.Lock() if len(fakeDocker.Created) != 1 || @@ -730,7 +730,7 @@ func TestSyncPodsWithPodInfraCreatesContainerCallsHandler(t *testing.T) { waitGroup.Wait() verifyCalls(t, fakeDocker, []string{ - "list", "list", "list", "inspect_container", "create", "start", "list", "inspect_container", "inspect_container"}) + "list", "list", "list", "inspect_container", "list", "create", "start", "list", "inspect_container", "inspect_container"}) fakeDocker.Lock() if len(fakeDocker.Created) != 1 || @@ -1632,7 +1632,7 @@ func TestSyncPodEventHandlerFails(t *testing.T) { t.Errorf("unexpected error: %v", err) } - verifyCalls(t, fakeDocker, []string{"list", "create", "start", "stop", "list"}) + verifyCalls(t, fakeDocker, []string{"list", "list", "create", "start", "stop", "list"}) if len(fakeDocker.Stopped) != 1 { t.Errorf("Wrong containers were stopped: %v", fakeDocker.Stopped) diff --git a/pkg/kubelet/runonce_test.go b/pkg/kubelet/runonce_test.go index c194135bf83..d00fba3aa90 100644 --- a/pkg/kubelet/runonce_test.go +++ b/pkg/kubelet/runonce_test.go @@ -81,7 +81,6 @@ func TestRunOnce(t *testing.T) { nodeLister: testNodeLister{}, statusManager: newStatusManager(nil), containerRefManager: kubecontainer.NewRefManager(), - readinessManager: kubecontainer.NewReadinessManager(), } kb.networkPlugin, _ = network.InitNetworkPlugin([]network.NetworkPlugin{}, "", network.NewFakeHost(nil))